In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [2]:
train_data = pd.read_csv('../input/gamma-ray-identification/train_processed.csv')
test_data = pd.read_csv('../input/gamma-ray-identification/test_processed.csv')
submission_data = pd.read_csv('../input/gamma-ray-identification/CAX_LogFacies_Submission_File.csv')


In [3]:
train_data['GR_rate'] = train_data.groupby(['well_id'])['GR'].shift(1)
train_data.GR_rate = (train_data.GR - train_data.GR_rate).fillna(0)
train_data.GR_rate = train_data.GR_rate/train_data.GR

In [4]:
test_data['GR_rate'] = test_data.groupby(['well_id'])['GR'].shift(1)
test_data.GR_rate = (test_data.GR - test_data.GR_rate).fillna(0)
test_data.GR_rate = test_data.GR_rate/test_data.GR

In [5]:
train_data.head(5)

Unnamed: 0,row_id,well_id,GR,label,processed_GR,processed_GR2,GR_rate
0,0,0,143.51,0,0.816197,1.042664,0.0
1,1,0,112.790928,0,0.609702,0.066697,-0.272354
2,2,0,123.531856,0,0.681903,0.407944,0.086949
3,3,0,111.692784,0,0.60232,0.031808,-0.105997
4,4,0,123.613712,0,0.682453,0.410545,0.096437


In [6]:
test_data.head(5)

Unnamed: 0,unique_id,row_id,well_id,GR,processed_GR,processed_GR2,GR_rate
0,CAX_0,0,5000,113.95,0.599852,-0.297544,0.0
1,CAX_1,1,5000,120.896397,0.64188,-0.092392,0.057457
2,CAX_2,2,5000,115.342793,0.608279,-0.25641,-0.048149
3,CAX_3,3,5000,118.85919,0.629554,-0.152558,0.029585
4,CAX_4,4,5000,127.735587,0.68326,0.109593,0.06949


In [7]:
submission_data.head(5)

Unnamed: 0,unique_id,label
0,CAX_0,
1,CAX_1,
2,CAX_2,
3,CAX_3,
4,CAX_4,


In [8]:
#!pip install keras_self_attention

In [9]:
from keras.models import Sequential, Model
from keras.layers import TimeDistributed, Dense, Embedding, LSTM, SpatialDropout1D, Input, InputLayer, Bidirectional, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import load_model
from keras_self_attention import SeqSelfAttention
from keras import optimizers

Using TensorFlow backend.


In [10]:
max_len = 110 #train_data[train_data.well_id == 0].shape[0]
n_output = train_data.label.nunique()
num_train_well = train_data.well_id.nunique()
num_test_well = test_data.well_id.nunique()

print (max_len, n_output)

110 5


In [11]:
import tensorflow as tf
import keras.backend as K

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    # tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    #f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

In [12]:
def get_simple_model():
    model = Sequential()
    model.add(InputLayer(input_shape=(max_len,1)))
    model.add(Conv1D(50,20,padding='same',activation='linear',strides=1))
    model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)))
    #model.add(SeqSelfAttention(attention_width=50,attention_activation='sigmoid',name='Attention'))
    model.add(TimeDistributed(Dense(n_output,activation='softmax')))
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [13]:
def get_simple_cnn_model():
    model = Sequential()
    model.add(InputLayer(input_shape=(max_len,1)))
    model.add(Conv1D(50,20,padding='same',activation='linear',strides=1))
    model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)))
    #model.add(SeqSelfAttention(attention_width=None,attention_activation='sigmoid',name='Attention'))
    model.add(TimeDistributed(Dense(n_output,activation='softmax')))
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [14]:
def get_model():
    model = Sequential()
    model.add(InputLayer(input_shape=(max_len,1)))
    model.add(Conv1D(50,20,padding='same',activation='linear',strides=1))
    #model.add(Bidirectional(LSTM(200, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)))
    model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)))
    model.add(Conv1D(50,20,padding='same',activation='linear',strides=1))
    model.add(Bidirectional(LSTM(50, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)))
    model.add(SeqSelfAttention(attention_width=50,attention_activation='sigmoid',name='Attention'))
    model.add(Dense(n_output,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [15]:
model = get_simple_model()
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 1100, 50)          1050      
_________________________________________________________________
bidirectional_1 (Bidirection (None, 1100, 200)         120800    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 1100, 5)           1005      
Total params: 122,855
Trainable params: 122,855
Non-trainable params: 0
_________________________________________________________________


In [15]:
from sklearn.model_selection import GroupKFold, KFold
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau 

In [17]:
X = train_data.GR_rate.values.reshape(num_train_well*10,max_len,1)
y = pd.get_dummies(train_data.label).values.reshape(num_train_well*10,max_len,n_output)

In [18]:
y[0]

array([[1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0,

In [19]:
batch_size = 32
n_epochs = 50

In [20]:
iter = 0
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = X[train_index]
    train_y = y[train_index]
    val_x = X[test_index]
    val_y = y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_simple_model()
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_accuracy',filepath='../working/weights_simple_lstm_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 3200 samples, validate on 800 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [22]:
model.save_weights('../working/weights_simple_lstm_1.hdf5')

In [20]:
model = get_simple_cnn_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_2 (Conv1D)            (None, 1100, 50)          1050      
_________________________________________________________________
bidirectional_2 (Bidirection (None, 1100, 200)         120800    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 1100, 5)           1005      
Total params: 122,855
Trainable params: 122,855
Non-trainable params: 0
_________________________________________________________________


In [32]:
iter = 0
n_epochs = 20
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = X[train_index]
    train_y = y[train_index]
    val_x = X[test_index]
    val_y = y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_simple_cnn_model()
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_simple_lstm_cnn_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 3200 samples, validate on 800 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.93348, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 2/20

Epoch 00002: val_acc improved from 0.93348 to 0.93588, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 3/20

Epoch 00003: val_acc improved from 0.93588 to 0.93815, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 4/20

Epoch 00004: val_acc improved from 0.93815 to 0.93873, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 5/20

Epoch 00005: val_acc improved from 0.93873 to 0.94145, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 6/20

Epoch 00006: val_acc improved from 0.94145 to 0.94232, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 7/20

Epoch 00007: val_acc improved from 0.94232 to 0.94361, saving model to ../working/weights_simple_lstm_cnn_1.hdf5
Epoch 8/20

Epoch 00008: val_acc improved from 0.94361 to 0.94522, sa

In [22]:
from keras.layers import Concatenate, BatchNormalization

In [20]:
def get_deep_cnn_model():
    input = Input((max_len,2))
    cnn1 = Conv1D(50,10,padding='same',activation='linear',strides=1)(input)
    cnn2 = Conv1D(50,50,padding='same',activation='linear',strides=1)(input)
    cnn3 = Conv1D(50,100,padding='same',activation='linear',strides=1)(input)
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True, stateful=True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="nadam",metrics = ['accuracy',f1])
    #print(model.summary())
    return model

In [21]:
model = get_deep_cnn_model()
model.summary()

Instructions for updating:
Colocations handled automatically by placer.


NameError: name 'Concatenate' is not defined

In [37]:
iter = 0
n_epochs = 25
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = X[train_index]
    train_y = y[train_index]
    val_x = X[test_index]
    val_y = y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_deep_cnn_model()
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_simple_lstm_deep_cnn_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 3200 samples, validate on 800 samples
Epoch 1/25

Epoch 00001: val_acc improved from -inf to 0.68608, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 2/25

Epoch 00002: val_acc improved from 0.68608 to 0.83221, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 3/25

Epoch 00003: val_acc improved from 0.83221 to 0.88063, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 4/25

Epoch 00004: val_acc improved from 0.88063 to 0.90265, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 5/25

Epoch 00005: val_acc improved from 0.90265 to 0.91941, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 6/25

Epoch 00006: val_acc improved from 0.91941 to 0.92737, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 7/25

Epoch 00007: val_acc improved from 0.92737 to 0.93261, saving model to ../working/weights_simple_lstm_deep_cnn_1.hdf5
Epoch 8/25

Epoch 00008: val_acc i

In [24]:
def get_deep_cnn_lstm_cnn_model():
    input = Input((max_len,1))
    cnn1 = Conv1D(50,10,padding='same',activation='linear',strides=1)(input)
    cnn2 = Conv1D(50,50,padding='same',activation='linear',strides=1)(input)
    cnn3 = Conv1D(50,100,padding='same',activation='linear',strides=1)(input)
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(out)
    cnn4 = Conv1D(50,10,padding='same',activation='linear',strides=1)(out)
    cnn5 = Conv1D(50,50,padding='same',activation='linear',strides=1)(out)
    cnn6 = Conv1D(50,100,padding='same',activation='linear',strides=1)(out)
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn4,cnn5,cnn6]))
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [25]:
model = get_deep_cnn_lstm_cnn_model()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 1100, 1)      0                                            
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 1100, 50)     550         input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_7 (Conv1D)               (None, 1100, 50)     2550        input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_8 (Conv1D)               (None, 1100, 50)     5050        input_4[0][0]                    
__________________________________________________________________________________________________
concatenat

In [45]:
iter = 0
n_epochs = 25
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = X[train_index]
    train_y = y[train_index]
    val_x = X[test_index]
    val_y = y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_deep_cnn_lstm_cnn_model()
    model.load_weights("../working/weights_simple_lstm_deep_cnn_lstm_1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_simple_lstm_deep_cnn_lstm_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 3200 samples, validate on 800 samples
Epoch 1/25

Epoch 00001: val_acc improved from -inf to 0.96137, saving model to ../working/weights_simple_lstm_deep_cnn_lstm_1.hdf5
Epoch 2/25

Epoch 00002: val_acc did not improve from 0.96137
Epoch 3/25

Epoch 00003: val_acc did not improve from 0.96137
Epoch 4/25

Epoch 00004: val_acc improved from 0.96137 to 0.96156, saving model to ../working/weights_simple_lstm_deep_cnn_lstm_1.hdf5
Epoch 5/25

Epoch 00005: val_acc did not improve from 0.96156
Epoch 6/25

Epoch 00006: val_acc did not improve from 0.96156
Epoch 7/25

Epoch 00007: val_acc improved from 0.96156 to 0.96248, saving model to ../working/weights_simple_lstm_deep_cnn_lstm_1.hdf5
Epoch 8/25

Epoch 00008: val_acc improved from 0.96248 to 0.96277, saving model to ../working/weights_simple_lstm_deep_cnn_lstm_1.hdf5
Epoch 9/25

Epoch 00009: val_acc did not improve from 0.96277
Epoch 10/25

Epoch 00010: val_acc improved from 0.96277 to 0.96291, saving model to ../working

KeyboardInterrupt: 

In [26]:
def get_deep_cnn_model2():
    input = Input((max_len,2))
    cnn1 = Conv1D(50,10,padding='same',activation='linear',strides=1)(input)
    cnn1 = BatchNormalization(axis=-1)(cnn1)
    cnn2 = Conv1D(50,50,padding='same',activation='linear',strides=1)(input)
    cnn2 = BatchNormalization(axis=-1)(cnn2)
    cnn3 = Conv1D(50,100,padding='same',activation='linear',strides=1)(input)
    cnn3 = BatchNormalization(axis=-1)(cnn3)
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="nadam",metrics = ['accuracy',f1])
    #print(model.summary())
    return model

In [27]:
model = get_deep_cnn_model2()
model.summary()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 110, 2)       0                                            
__________________________________________________________________________________________________
conv1d_7 (Conv1D)               (None, 110, 50)      1050        input_3[0][0]                    
__________________________________________________________________________________________________
conv1d_8 (Conv1D)               (None, 110, 50)      5050        input_3[0][0]                    
__________________________________________________________________________________________________
conv1d_9 (Conv1D)               (None, 110, 50)      10050       input_3[0][0]               

In [29]:
iter = 0
n_epochs = 25
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well*10,max_len,2)[train_index]
    train_y = y[train_index]
    val_x = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well*10,max_len,2)[test_index]
    val_y = y[test_index]
    
    print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    

Iteration 1
(32000, 110, 2) (32000, 110, 5) (8000, 110, 2) (8000, 110, 5)
Iteration 2
(32000, 110, 2) (32000, 110, 5) (8000, 110, 2) (8000, 110, 5)
Iteration 3
(32000, 110, 2) (32000, 110, 5) (8000, 110, 2) (8000, 110, 5)
Iteration 4
(32000, 110, 2) (32000, 110, 5) (8000, 110, 2) (8000, 110, 5)
Iteration 5
(32000, 110, 2) (32000, 110, 5) (8000, 110, 2) (8000, 110, 5)


In [30]:
model = get_deep_cnn_model2()
#model.load_weights("weights1.hdf5")
early = EarlyStopping(monitor='val_f1', min_delta=0, patience=7, verbose=0, mode='max', baseline=None, restore_best_weights=False)
lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
checkpointer = ModelCheckpoint(monitor='val_f1',filepath='../working/weights_simple_lstm_deep_cnn2_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)

history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
all_history[iter] = history

Instructions for updating:
Use tf.cast instead.
Train on 32000 samples, validate on 8000 samples
Epoch 1/25

Epoch 00001: val_f1 improved from -inf to 0.85073, saving model to ../working/weights_simple_lstm_deep_cnn2_5.hdf5
Epoch 2/25

Epoch 00002: val_f1 improved from 0.85073 to 0.86149, saving model to ../working/weights_simple_lstm_deep_cnn2_5.hdf5
Epoch 3/25

Epoch 00003: val_f1 did not improve from 0.86149
Epoch 4/25

Epoch 00004: val_f1 improved from 0.86149 to 0.87140, saving model to ../working/weights_simple_lstm_deep_cnn2_5.hdf5
Epoch 5/25

Epoch 00005: val_f1 improved from 0.87140 to 0.87342, saving model to ../working/weights_simple_lstm_deep_cnn2_5.hdf5
Epoch 6/25

Epoch 00006: val_f1 improved from 0.87342 to 0.87880, saving model to ../working/weights_simple_lstm_deep_cnn2_5.hdf5
Epoch 7/25

Epoch 00007: val_f1 did not improve from 0.87880
Epoch 8/25

Epoch 00008: val_f1 did not improve from 0.87880
Epoch 9/25

Epoch 00009: val_f1 did not improve from 0.87880
Epoch 10/25


In [209]:
iter = 0
n_epochs = 25
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = train_data[['GR','GR_rate']].values.reshape(num_train_well,max_len,2)[train_index]
    train_y = y[train_index]
    val_x = train_data[['GR','GR_rate']].values.reshape(num_train_well,max_len,2)[test_index]
    val_y = y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_deep_cnn_model2()
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_simple_lstm_deep_cnn3_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 3200 samples, validate on 800 samples
Epoch 1/25

Epoch 00001: val_acc improved from -inf to 0.22426, saving model to ../working/weights_simple_lstm_deep_cnn3_1.hdf5
Epoch 2/25

Epoch 00002: val_acc improved from 0.22426 to 0.43610, saving model to ../working/weights_simple_lstm_deep_cnn3_1.hdf5
Epoch 3/25

Epoch 00003: val_acc improved from 0.43610 to 0.90582, saving model to ../working/weights_simple_lstm_deep_cnn3_1.hdf5
Epoch 4/25

Epoch 00004: val_acc did not improve from 0.90582
Epoch 5/25

Epoch 00005: val_acc improved from 0.90582 to 0.93549, saving model to ../working/weights_simple_lstm_deep_cnn3_1.hdf5
Epoch 6/25

Epoch 00006: val_acc improved from 0.93549 to 0.94103, saving model to ../working/weights_simple_lstm_deep_cnn3_1.hdf5
Epoch 7/25

Epoch 00007: val_acc did not improve from 0.94103
Epoch 8/25

Epoch 00008: val_acc improved from 0.94103 to 0.94521, saving model to ../working/weights_simple_lstm_deep_cnn3_1.hdf5
Epoch 9/25

Epoch 00009: val_acc d

In [28]:
def get_deep_cnn_dense_model2():
    input = Input((max_len,2))
    cnn1 = Conv1D(50,10,padding='same',activation='linear',strides=1)(input)
    cnn1 = BatchNormalization(axis=-1)(cnn1)
    cnn2 = Conv1D(50,50,padding='same',activation='linear',strides=1)(input)
    cnn2 = BatchNormalization(axis=-1)(cnn2)
    cnn3 = Conv1D(50,100,padding='same',activation='linear',strides=1)(input)
    cnn3 = BatchNormalization(axis=-1)(cnn3)
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    out = TimeDistributed(Dense(50))(out)
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [29]:
model = get_deep_cnn_dense_model2()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 1100, 2)      0                                            
__________________________________________________________________________________________________
conv1d_15 (Conv1D)              (None, 1100, 50)     1050        input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_16 (Conv1D)              (None, 1100, 50)     5050        input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_17 (Conv1D)              (None, 1100, 50)     10050       input_6[0][0]                    
__________________________________________________________________________________________________
batch_norm

In [121]:
iter = 0
n_epochs = 50
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well,max_len,2)[train_index]
    train_y = y[train_index]
    val_x = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well,max_len,2)[test_index]
    val_y = y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_deep_cnn_dense_model2()
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_simple_lstm_deep_cnn2_dense_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 3200 samples, validate on 800 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.91006, saving model to ../working/weights_simple_lstm_deep_cnn2_dense_1.hdf5
Epoch 2/50

Epoch 00002: val_acc improved from 0.91006 to 0.92919, saving model to ../working/weights_simple_lstm_deep_cnn2_dense_1.hdf5
Epoch 3/50

Epoch 00003: val_acc improved from 0.92919 to 0.93705, saving model to ../working/weights_simple_lstm_deep_cnn2_dense_1.hdf5
Epoch 4/50

Epoch 00004: val_acc improved from 0.93705 to 0.94071, saving model to ../working/weights_simple_lstm_deep_cnn2_dense_1.hdf5
Epoch 5/50

Epoch 00005: val_acc improved from 0.94071 to 0.94431, saving model to ../working/weights_simple_lstm_deep_cnn2_dense_1.hdf5
Epoch 6/50

Epoch 00006: val_acc improved from 0.94431 to 0.94706, saving model to ../working/weights_simple_lstm_deep_cnn2_dense_1.hdf5
Epoch 7/50

Epoch 00007: val_acc improved from 0.94706 to 0.94956, saving model to ../working/weights_simple_lstm_deep_cnn

In [30]:
def get_attn_model2():
    input = Input((max_len,2))
    cnn1 = Conv1D(50,10,padding='same',activation='linear',strides=1)(input)
    cnn1 = BatchNormalization(axis=-1)(cnn1)
    cnn2 = Conv1D(50,50,padding='same',activation='linear',strides=1)(input)
    cnn2 = BatchNormalization(axis=-1)(cnn2)
    cnn3 = Conv1D(50,100,padding='same',activation='linear',strides=1)(input)
    cnn3 = BatchNormalization(axis=-1)(cnn3)
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(out)
    
    cnn4 = Conv1D(50,10,padding='same',activation='linear',strides=1)(out)
    cnn4 = BatchNormalization(axis=-1)(cnn4)
    cnn5 = Conv1D(50,50,padding='same',activation='linear',strides=1)(out)
    cnn5 = BatchNormalization(axis=-1)(cnn5)
    cnn6 = Conv1D(50,100,padding='same',activation='linear',strides=1)(out)
    cnn6 = BatchNormalization(axis=-1)(cnn6)
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn4,cnn5,cnn6]))
    attn = SeqSelfAttention(units=20,attention_width=20,attention_activation='sigmoid',name='Attention1')(out)
    out = TimeDistributed(Dense(n_output,activation='softmax'))(Concatenate(axis=-1)([out,attn]))
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [31]:
def get_multout_model():
    input = Input((max_len,2))
    cnn1 = Conv1D(50,10,padding='same',activation='linear',strides=1)(input)
    cnn1 = BatchNormalization(axis=-1)(cnn1)
    cnn2 = Conv1D(50,50,padding='same',activation='linear',strides=1)(input)
    cnn2 = BatchNormalization(axis=-1)(cnn2)
    cnn3 = Conv1D(50,100,padding='same',activation='linear',strides=1)(input)
    cnn3 = BatchNormalization(axis=-1)(cnn3)
    out1 = TimeDistributed(Dense(n_output,activation='softmax'))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3]))
    out2 = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(out)
    out3 = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    
    cnn4 = Conv1D(50,10,padding='same',activation='linear',strides=1)(out)
    cnn4 = BatchNormalization(axis=-1)(cnn4)
    cnn5 = Conv1D(50,50,padding='same',activation='linear',strides=1)(out)
    cnn5 = BatchNormalization(axis=-1)(cnn5)
    cnn6 = Conv1D(50,100,padding='same',activation='linear',strides=1)(out)
    cnn6 = BatchNormalization(axis=-1)(cnn6)
    
    out4 = TimeDistributed(Dense(n_output,activation='softmax'))(Concatenate(axis=-1)([cnn4,cnn5,cnn6]))
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn4,cnn5,cnn6]))
    #attn = SeqSelfAttention(units=20,attention_width=20,attention_activation='sigmoid',name='Attention1')(out)
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,[out1,out2,out3,out4,out])
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [32]:
def get_deep_lstm_model():
    input = Input((max_len,2))

    out = LSTM(512, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)(input)
    out = LSTM(256, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)(out)

    cnn4 = Conv1D(100,10,padding='same',strides=1)(out)
    cnn4 = BatchNormalization(axis=-1)(cnn4)
    cnn5 = Conv1D(100,50,padding='same',strides=1)(out)
    cnn5 = BatchNormalization(axis=-1)(cnn5)
    cnn6 = Conv1D(100,100,padding='same',strides=1)(out)
    cnn6 = BatchNormalization(axis=-1)(cnn6)
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn4,cnn5,cnn6,out]))
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [33]:
def get_very_deep_lstm_cnn_bn_model():
    input = Input((max_len,2))
    cnn1 = Conv1D(100,10,padding='same',strides=1)(input)
    cnn1 = BatchNormalization(axis=-1)(cnn1)
    cnn2 = Conv1D(100,50,padding='same',strides=1)(input)
    cnn2 = BatchNormalization(axis=-1)(cnn2)
    cnn3 = Conv1D(100,100,padding='same',strides=1)(input)
    cnn3 = BatchNormalization(axis=-1)(cnn3)
    out = Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn1,cnn2,cnn3,input]))
    out = BatchNormalization(axis=-1)(out)
    out = Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(out)
    out = BatchNormalization(axis=-1)(out)
    cnn4 = Conv1D(100,10,padding='same',strides=1)(out)
    cnn4 = BatchNormalization(axis=-1)(cnn4)
    cnn5 = Conv1D(100,50,padding='same',strides=1)(out)
    cnn5 = BatchNormalization(axis=-1)(cnn5)
    cnn6 = Conv1D(100,100,padding='same',strides=1)(out)
    cnn6 = BatchNormalization(axis=-1)(cnn6)
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn4,cnn5,cnn6,out]))
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [47]:
def get_very_deep_lstm_cnn_bn_model_with_attn():
    input = Input((max_len,2))
    
    inter_model = get_very_deep_lstm_cnn_bn_model()
    inter_model.load_weights("../models/weights_very_deep_cnn_lstm_bn_1.hdf5")

    cnn1 = inter_model.layers[1](input)
    cnn1 = inter_model.layers[4](cnn1)
    cnn2 = inter_model.layers[2](input)
    cnn2 = inter_model.layers[5](cnn2)
    cnn3 = inter_model.layers[3](input)
    cnn3 = inter_model.layers[6](cnn3)
    out = inter_model.layers[7]([cnn1,cnn2,cnn3,input])
    out = inter_model.layers[8](out)
    out = inter_model.layers[9](out)
    out = inter_model.layers[10](out)
    out = inter_model.layers[11](out)
    cnn4 = inter_model.layers[12](out)
    cnn4 = inter_model.layers[15](cnn4)
    cnn5 = inter_model.layers[13](out)
    cnn5 = inter_model.layers[16](cnn5)
    cnn6 = inter_model.layers[14](out)
    cnn6 = inter_model.layers[17](cnn6)
    out = inter_model.layers[18]([cnn4,cnn5,cnn6,out])
    out = inter_model.layers[19](out)
    
    out = Bidirectional(LSTM(50, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(out)
    attn = SeqSelfAttention(units=20,attention_width=50,attention_activation='sigmoid',name='Attention1')(out)
    out = TimeDistributed(Dense(n_output,activation='softmax'))(Concatenate(axis=-1)([attn,out]))
    model = Model(input,out)
    
    for layer in model.layers[1:-4]:
        layer.trainable = False
        
    return model

In [35]:
def get_deep_bilstm_model():
    input = Input((max_len,2))

    out = Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(input)
    out = Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(out)

    cnn4 = Conv1D(100,10,padding='same',strides=1)(out)
    cnn4 = BatchNormalization(axis=-1)(cnn4)
    cnn5 = Conv1D(100,50,padding='same',strides=1)(out)
    cnn5 = BatchNormalization(axis=-1)(cnn5)
    cnn6 = Conv1D(100,100,padding='same',strides=1)(out)
    cnn6 = BatchNormalization(axis=-1)(cnn6)
    
    out = Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))(Concatenate(axis=-1)([cnn4,cnn5,cnn6,out]))
    out = TimeDistributed(Dense(n_output,activation='softmax'))(out)
    model = Model(input,out)
    model.compile(loss = 'categorical_crossentropy', optimizer="adam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [37]:
test_model = get_deep_lstm_model()
test_model.load_weights("../models/weights_very_deep_lstm2_cnn_lstm_1.hdf5")

In [43]:
test_model = get_very_deep_lstm_cnn_bn_model()
test_model.load_weights("../models/weights_very_deep_cnn_lstm_bn_1.hdf5")

In [48]:
test_model = get_very_deep_lstm_cnn_bn_model_with_attn()
test_model.load_weights("../models/weights_very_deep_cnn_lstm_bn_attn_1.hdf5")

In [39]:
iter = 0

for train_index, test_index in KFold(n_splits=5).split(X,y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well,max_len,2)[train_index]
    train_y = y[train_index]
    val_x = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well,max_len,2)[test_index]
    val_y = y[test_index]
    
    break

Iteration 1


In [49]:
test_pred = test_model.predict(val_x).argmax(-1)
test_pred_val = test_pred.reshape(test_pred.shape[0]*test_pred.shape[1])

val_actual = val_y.argmax(-1)
val_actual = val_actual.reshape(val_actual.shape[0]*val_actual.shape[1])

In [41]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

In [50]:
print (accuracy_score(val_actual,test_pred_val))
print (f1_score(val_actual,test_pred_val,average='macro'))
print (confusion_matrix(val_actual,test_pred_val))

0.9666784090909091
0.9540715828730058
[[454840   1332     17   2781   2187]
 [  1737  95986   1364   1648   1403]
 [    12   1617 100164   1137   1250]
 [  2921   2469    914  97842    594]
 [  2773   1433   1152    582 101845]]


In [166]:
print (accuracy_score(val_actual,test_pred_val))
print (f1_score(val_actual,test_pred_val,average='macro'))
conf = confusion_matrix(val_actual,test_pred_val)
print (conf/conf.sum(axis=1)[:,np.newaxis])

0.9666784090909091
0.9540715828730058
[[9.86301845e-01 2.88838725e-03 3.68638013e-05 6.03048420e-03
  4.74241961e-03]
 [1.70064031e-02 9.39767765e-01 1.33544812e-02 1.61350330e-02
  1.37363175e-02]
 [1.15185256e-04 1.55212133e-02 9.61451334e-01 1.09138030e-02
  1.19984642e-02]
 [2.78881039e-02 2.35726561e-02 8.72637006e-03 9.34141684e-01
  5.67118579e-03]
 [2.57271420e-02 1.32949854e-02 1.06879436e-02 5.39963817e-03
  9.44890291e-01]]


In [51]:
val_data = train_data.iloc[:val_x.shape[0]*val_x.shape[1]]
val_data['true_label'] = val_actual
val_data['pred_label'] = test_pred_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [52]:
val_data.head(10)

Unnamed: 0,row_id,well_id,GR,label,processed_GR,processed_GR2,GR_rate,true_label,pred_label
0,0,0,143.51,0,0.816197,1.042664,0.0,0,0
1,1,0,112.790928,0,0.609702,0.066697,-0.272354,0,0
2,2,0,123.531856,0,0.681903,0.407944,0.086949,0,0
3,3,0,111.692784,0,0.60232,0.031808,-0.105997,0,0
4,4,0,123.613712,0,0.682453,0.410545,0.096437,0,0
5,5,0,120.414641,0,0.660949,0.308908,-0.026567,0,0
6,6,0,123.145569,0,0.679306,0.395672,0.022176,0,0
7,7,0,114.216497,0,0.619285,0.111989,-0.078177,0,0
8,8,0,119.387425,0,0.654044,0.276273,0.043312,0,0
9,9,0,132.728353,0,0.743722,0.700124,0.100513,0,0


In [53]:
val_accuracy = val_data.groupby(['well_id'])['label','pred_label'].agg(lambda x: accuracy_score(x['label'],x['pred_label'])).reset_index()
val_accuracy.label.describe()

count    800.000000
mean       0.963043
std        0.021290
min        0.870000
25%        0.950909
50%        0.967273
75%        0.979091
max        0.997273
Name: label, dtype: float64

In [54]:
val_accuracy.sort_values(['label'],ascending=[True]).head(10)

Unnamed: 0,well_id,label,pred_label
142,142,0.87,0.87
600,600,0.885455,0.885455
385,385,0.886364,0.886364
547,547,0.889091,0.889091
195,195,0.895455,0.895455
146,146,0.896364,0.896364
508,508,0.896364,0.896364
598,598,0.897273,0.897273
461,461,0.9,0.9
380,380,0.900909,0.900909


In [59]:
test_pred = test_model.predict(test_data.GR_rate.values.reshape(num_test_well,max_len,1)).argmax(-1)
test_pred = test_pred.reshape(test_pred.shape[0]*test_pred.shape[1])

In [60]:
submission_data.label = test_pred

In [61]:
submission_data.head(10)

Unnamed: 0,unique_id,label
0,CAX_0,3
1,CAX_1,0
2,CAX_2,0
3,CAX_3,0
4,CAX_4,0
5,CAX_5,0
6,CAX_6,0
7,CAX_7,0
8,CAX_8,4
9,CAX_9,4


In [62]:
submission_data.to_csv("../data/submission_model11.csv",index=False)

In [63]:
submission_data_prev_best = pd.read_csv('../data/submission7_cnn_bilstm_attn.csv')

In [64]:
submission_data_prev_best.head(10)

Unnamed: 0,unique_id,label
0,CAX_0,0
1,CAX_1,0
2,CAX_2,0
3,CAX_3,0
4,CAX_4,0
5,CAX_5,0
6,CAX_6,0
7,CAX_7,0
8,CAX_8,0
9,CAX_9,0


In [65]:
submission_data.loc[submission_data.index%1100 == 0,"label"] = submission_data_prev_best.iloc[submission_data_prev_best.index%1100 == 0].label

In [66]:
submission_data.head(10)

Unnamed: 0,unique_id,label
0,CAX_0,0
1,CAX_1,0
2,CAX_2,0
3,CAX_3,0
4,CAX_4,0
5,CAX_5,0
6,CAX_6,0
7,CAX_7,0
8,CAX_8,4
9,CAX_9,4


In [67]:
submission_data.to_csv('../data/submission_model11_ensemble.csv',index=False)

### Ensemble

In [211]:
train_data[['row_id','label']].corr()

Unnamed: 0,row_id,label
row_id,1.0,-0.048928
label,-0.048928,1.0


In [212]:
pd.crosstab(train_data.label,train_data.row_id)

row_id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,...,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999,1000,1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1,Unnamed: 523_level_1,Unnamed: 524_level_1,Unnamed: 525_level_1,Unnamed: 526_level_1,Unnamed: 527_level_1,Unnamed: 528_level_1,Unnamed: 529_level_1,Unnamed: 530_level_1,Unnamed: 531_level_1,Unnamed: 532_level_1,Unnamed: 533_level_1,Unnamed: 534_level_1,Unnamed: 535_level_1,Unnamed: 536_level_1,Unnamed: 537_level_1,Unnamed: 538_level_1,Unnamed: 539_level_1,Unnamed: 540_level_1,Unnamed: 541_level_1,Unnamed: 542_level_1,Unnamed: 543_level_1,Unnamed: 544_level_1,Unnamed: 545_level_1,Unnamed: 546_level_1,Unnamed: 547_level_1,Unnamed: 548_level_1,Unnamed: 549_level_1,Unnamed: 550_level_1,Unnamed: 551_level_1,Unnamed: 552_level_1,Unnamed: 553_level_1,Unnamed: 554_level_1,Unnamed: 555_level_1,Unnamed: 556_level_1,Unnamed: 557_level_1,Unnamed: 558_level_1,Unnamed: 559_level_1,Unnamed: 560_level_1,Unnamed: 561_level_1,Unnamed: 562_level_1,Unnamed: 563_level_1,Unnamed: 564_level_1,Unnamed: 565_level_1,Unnamed: 566_level_1,Unnamed: 567_level_1,Unnamed: 568_level_1,Unnamed: 569_level_1,Unnamed: 570_level_1,Unnamed: 571_level_1,Unnamed: 572_level_1,Unnamed: 573_level_1,Unnamed: 574_level_1,Unnamed: 575_level_1,Unnamed: 576_level_1,Unnamed: 577_level_1,Unnamed: 578_level_1,Unnamed: 579_level_1,Unnamed: 580_level_1,Unnamed: 581_level_1,Unnamed: 582_level_1,Unnamed: 583_level_1,Unnamed: 584_level_1,Unnamed: 585_level_1,Unnamed: 586_level_1,Unnamed: 587_level_1,Unnamed: 588_level_1,Unnamed: 589_level_1,Unnamed: 590_level_1,Unnamed: 591_level_1,Unnamed: 592_level_1,Unnamed: 593_level_1,Unnamed: 594_level_1,Unnamed: 595_level_1,Unnamed: 596_level_1,Unnamed: 597_level_1,Unnamed: 598_level_1,Unnamed: 599_level_1,Unnamed: 600_level_1,Unnamed: 601_level_1,Unnamed: 602_level_1,Unnamed: 603_level_1,Unnamed: 604_level_1,Unnamed: 605_level_1,Unnamed: 606_level_1,Unnamed: 607_level_1,Unnamed: 608_level_1,Unnamed: 609_level_1,Unnamed: 610_level_1,Unnamed: 611_level_1,Unnamed: 612_level_1,Unnamed: 613_level_1,Unnamed: 614_level_1,Unnamed: 615_level_1,Unnamed: 616_level_1,Unnamed: 617_level_1,Unnamed: 618_level_1,Unnamed: 619_level_1,Unnamed: 620_level_1,Unnamed: 621_level_1,Unnamed: 622_level_1,Unnamed: 623_level_1,Unnamed: 624_level_1,Unnamed: 625_level_1,Unnamed: 626_level_1,Unnamed: 627_level_1,Unnamed: 628_level_1,Unnamed: 629_level_1,Unnamed: 630_level_1,Unnamed: 631_level_1,Unnamed: 632_level_1,Unnamed: 633_level_1,Unnamed: 634_level_1,Unnamed: 635_level_1,Unnamed: 636_level_1,Unnamed: 637_level_1,Unnamed: 638_level_1,Unnamed: 639_level_1,Unnamed: 640_level_1,Unnamed: 641_level_1,Unnamed: 642_level_1,Unnamed: 643_level_1,Unnamed: 644_level_1,Unnamed: 645_level_1,Unnamed: 646_level_1,Unnamed: 647_level_1,Unnamed: 648_level_1,Unnamed: 649_level_1,Unnamed: 650_level_1,Unnamed: 651_level_1,Unnamed: 652_level_1,Unnamed: 653_level_1,Unnamed: 654_level_1,Unnamed: 655_level_1,Unnamed: 656_level_1,Unnamed: 657_level_1,Unnamed: 658_level_1,Unnamed: 659_level_1,Unnamed: 660_level_1,Unnamed: 661_level_1,Unnamed: 662_level_1,Unnamed: 663_level_1,Unnamed: 664_level_1,Unnamed: 665_level_1,Unnamed: 666_level_1,Unnamed: 667_level_1,Unnamed: 668_level_1,Unnamed: 669_level_1,Unnamed: 670_level_1,Unnamed: 671_level_1,Unnamed: 672_level_1,Unnamed: 673_level_1,Unnamed: 674_level_1,Unnamed: 675_level_1,Unnamed: 676_level_1,Unnamed: 677_level_1,Unnamed: 678_level_1,Unnamed: 679_level_1,Unnamed: 680_level_1,Unnamed: 681_level_1,Unnamed: 682_level_1,Unnamed: 683_level_1,Unnamed: 684_level_1,Unnamed: 685_level_1,Unnamed: 686_level_1,Unnamed: 687_level_1,Unnamed: 688_level_1,Unnamed: 689_level_1,Unnamed: 690_level_1,Unnamed: 691_level_1,Unnamed: 692_level_1,Unnamed: 693_level_1,Unnamed: 694_level_1,Unnamed: 695_level_1,Unnamed: 696_level_1,Unnamed: 697_level_1,Unnamed: 698_level_1,Unnamed: 699_level_1,Unnamed: 700_level_1,Unnamed: 701_level_1,Unnamed: 702_level_1,Unnamed: 703_level_1,Unnamed: 704_level_1,Unnamed: 705_level_1,Unnamed: 706_level_1,Unnamed: 707_level_1,Unnamed: 708_level_1,Unnamed: 709_level_1,Unnamed: 710_level_1,Unnamed: 711_level_1,Unnamed: 712_level_1,Unnamed: 713_level_1,Unnamed: 714_level_1,Unnamed: 715_level_1,Unnamed: 716_level_1,Unnamed: 717_level_1,Unnamed: 718_level_1,Unnamed: 719_level_1,Unnamed: 720_level_1,Unnamed: 721_level_1,Unnamed: 722_level_1,Unnamed: 723_level_1,Unnamed: 724_level_1,Unnamed: 725_level_1,Unnamed: 726_level_1,Unnamed: 727_level_1,Unnamed: 728_level_1,Unnamed: 729_level_1,Unnamed: 730_level_1,Unnamed: 731_level_1,Unnamed: 732_level_1,Unnamed: 733_level_1,Unnamed: 734_level_1,Unnamed: 735_level_1,Unnamed: 736_level_1,Unnamed: 737_level_1,Unnamed: 738_level_1,Unnamed: 739_level_1,Unnamed: 740_level_1,Unnamed: 741_level_1,Unnamed: 742_level_1,Unnamed: 743_level_1,Unnamed: 744_level_1,Unnamed: 745_level_1,Unnamed: 746_level_1,Unnamed: 747_level_1,Unnamed: 748_level_1,Unnamed: 749_level_1,Unnamed: 750_level_1,Unnamed: 751_level_1,Unnamed: 752_level_1,Unnamed: 753_level_1,Unnamed: 754_level_1,Unnamed: 755_level_1,Unnamed: 756_level_1,Unnamed: 757_level_1,Unnamed: 758_level_1,Unnamed: 759_level_1,Unnamed: 760_level_1,Unnamed: 761_level_1,Unnamed: 762_level_1,Unnamed: 763_level_1,Unnamed: 764_level_1,Unnamed: 765_level_1,Unnamed: 766_level_1,Unnamed: 767_level_1,Unnamed: 768_level_1,Unnamed: 769_level_1,Unnamed: 770_level_1,Unnamed: 771_level_1,Unnamed: 772_level_1,Unnamed: 773_level_1,Unnamed: 774_level_1,Unnamed: 775_level_1,Unnamed: 776_level_1,Unnamed: 777_level_1,Unnamed: 778_level_1,Unnamed: 779_level_1,Unnamed: 780_level_1,Unnamed: 781_level_1,Unnamed: 782_level_1,Unnamed: 783_level_1,Unnamed: 784_level_1,Unnamed: 785_level_1,Unnamed: 786_level_1,Unnamed: 787_level_1,Unnamed: 788_level_1,Unnamed: 789_level_1,Unnamed: 790_level_1,Unnamed: 791_level_1,Unnamed: 792_level_1,Unnamed: 793_level_1,Unnamed: 794_level_1,Unnamed: 795_level_1,Unnamed: 796_level_1,Unnamed: 797_level_1,Unnamed: 798_level_1,Unnamed: 799_level_1,Unnamed: 800_level_1,Unnamed: 801_level_1,Unnamed: 802_level_1,Unnamed: 803_level_1,Unnamed: 804_level_1,Unnamed: 805_level_1,Unnamed: 806_level_1,Unnamed: 807_level_1,Unnamed: 808_level_1,Unnamed: 809_level_1,Unnamed: 810_level_1,Unnamed: 811_level_1,Unnamed: 812_level_1,Unnamed: 813_level_1,Unnamed: 814_level_1,Unnamed: 815_level_1,Unnamed: 816_level_1,Unnamed: 817_level_1,Unnamed: 818_level_1,Unnamed: 819_level_1,Unnamed: 820_level_1,Unnamed: 821_level_1,Unnamed: 822_level_1,Unnamed: 823_level_1,Unnamed: 824_level_1,Unnamed: 825_level_1,Unnamed: 826_level_1,Unnamed: 827_level_1,Unnamed: 828_level_1,Unnamed: 829_level_1,Unnamed: 830_level_1,Unnamed: 831_level_1,Unnamed: 832_level_1,Unnamed: 833_level_1,Unnamed: 834_level_1,Unnamed: 835_level_1,Unnamed: 836_level_1,Unnamed: 837_level_1,Unnamed: 838_level_1,Unnamed: 839_level_1,Unnamed: 840_level_1,Unnamed: 841_level_1,Unnamed: 842_level_1,Unnamed: 843_level_1,Unnamed: 844_level_1,Unnamed: 845_level_1,Unnamed: 846_level_1,Unnamed: 847_level_1,Unnamed: 848_level_1,Unnamed: 849_level_1,Unnamed: 850_level_1,Unnamed: 851_level_1,Unnamed: 852_level_1,Unnamed: 853_level_1,Unnamed: 854_level_1,Unnamed: 855_level_1,Unnamed: 856_level_1,Unnamed: 857_level_1,Unnamed: 858_level_1,Unnamed: 859_level_1,Unnamed: 860_level_1,Unnamed: 861_level_1,Unnamed: 862_level_1,Unnamed: 863_level_1,Unnamed: 864_level_1,Unnamed: 865_level_1,Unnamed: 866_level_1,Unnamed: 867_level_1,Unnamed: 868_level_1,Unnamed: 869_level_1,Unnamed: 870_level_1,Unnamed: 871_level_1,Unnamed: 872_level_1,Unnamed: 873_level_1,Unnamed: 874_level_1,Unnamed: 875_level_1,Unnamed: 876_level_1,Unnamed: 877_level_1,Unnamed: 878_level_1,Unnamed: 879_level_1,Unnamed: 880_level_1,Unnamed: 881_level_1,Unnamed: 882_level_1,Unnamed: 883_level_1,Unnamed: 884_level_1,Unnamed: 885_level_1,Unnamed: 886_level_1,Unnamed: 887_level_1,Unnamed: 888_level_1,Unnamed: 889_level_1,Unnamed: 890_level_1,Unnamed: 891_level_1,Unnamed: 892_level_1,Unnamed: 893_level_1,Unnamed: 894_level_1,Unnamed: 895_level_1,Unnamed: 896_level_1,Unnamed: 897_level_1,Unnamed: 898_level_1,Unnamed: 899_level_1,Unnamed: 900_level_1,Unnamed: 901_level_1,Unnamed: 902_level_1,Unnamed: 903_level_1,Unnamed: 904_level_1,Unnamed: 905_level_1,Unnamed: 906_level_1,Unnamed: 907_level_1,Unnamed: 908_level_1,Unnamed: 909_level_1,Unnamed: 910_level_1,Unnamed: 911_level_1,Unnamed: 912_level_1,Unnamed: 913_level_1,Unnamed: 914_level_1,Unnamed: 915_level_1,Unnamed: 916_level_1,Unnamed: 917_level_1,Unnamed: 918_level_1,Unnamed: 919_level_1,Unnamed: 920_level_1,Unnamed: 921_level_1,Unnamed: 922_level_1,Unnamed: 923_level_1,Unnamed: 924_level_1,Unnamed: 925_level_1,Unnamed: 926_level_1,Unnamed: 927_level_1,Unnamed: 928_level_1,Unnamed: 929_level_1,Unnamed: 930_level_1,Unnamed: 931_level_1,Unnamed: 932_level_1,Unnamed: 933_level_1,Unnamed: 934_level_1,Unnamed: 935_level_1,Unnamed: 936_level_1,Unnamed: 937_level_1,Unnamed: 938_level_1,Unnamed: 939_level_1,Unnamed: 940_level_1,Unnamed: 941_level_1,Unnamed: 942_level_1,Unnamed: 943_level_1,Unnamed: 944_level_1,Unnamed: 945_level_1,Unnamed: 946_level_1,Unnamed: 947_level_1,Unnamed: 948_level_1,Unnamed: 949_level_1,Unnamed: 950_level_1,Unnamed: 951_level_1,Unnamed: 952_level_1,Unnamed: 953_level_1,Unnamed: 954_level_1,Unnamed: 955_level_1,Unnamed: 956_level_1,Unnamed: 957_level_1,Unnamed: 958_level_1,Unnamed: 959_level_1,Unnamed: 960_level_1,Unnamed: 961_level_1,Unnamed: 962_level_1,Unnamed: 963_level_1,Unnamed: 964_level_1,Unnamed: 965_level_1,Unnamed: 966_level_1,Unnamed: 967_level_1,Unnamed: 968_level_1,Unnamed: 969_level_1,Unnamed: 970_level_1,Unnamed: 971_level_1,Unnamed: 972_level_1,Unnamed: 973_level_1,Unnamed: 974_level_1,Unnamed: 975_level_1,Unnamed: 976_level_1,Unnamed: 977_level_1,Unnamed: 978_level_1,Unnamed: 979_level_1,Unnamed: 980_level_1,Unnamed: 981_level_1,Unnamed: 982_level_1,Unnamed: 983_level_1,Unnamed: 984_level_1,Unnamed: 985_level_1,Unnamed: 986_level_1,Unnamed: 987_level_1,Unnamed: 988_level_1,Unnamed: 989_level_1,Unnamed: 990_level_1,Unnamed: 991_level_1,Unnamed: 992_level_1,Unnamed: 993_level_1,Unnamed: 994_level_1,Unnamed: 995_level_1,Unnamed: 996_level_1,Unnamed: 997_level_1,Unnamed: 998_level_1,Unnamed: 999_level_1,Unnamed: 1000_level_1,Unnamed: 1001_level_1
0,3949,3904,3847,3787,3732,3670,3621,3569,3523,3477,3420,3374,3338,3296,3255,3205,3165,3120,3071,3031,2996,2951,2915,2877,2837,2805,2777,2748,2716,2680,2652,2627,2614,2596,2562,2532,2504,2480,2461,2451,2427,2402,2371,2365,2351,2323,2295,2270,2251,2246,2231,2223,2216,2205,2199,2177,2151,2141,2126,2102,2102,2077,2079,2067,2066,2055,2048,2051,2028,2021,2023,2004,1997,1987,1978,1978,1970,1972,1970,1955,1942,1937,1943,1945,1933,1936,1928,1929,1919,1919,1914,1916,1913,1911,1914,1907,1902,1897,1899,1899,1899,1901,1892,1896,1892,1886,1882,1880,1881,1886,1902,1915,1917,1911,1909,1898,1901,1903,1913,1924,1926,1923,1919,1927,1929,1929,1915,1924,1919,1922,1919,1913,1921,1923,1917,1915,1924,1929,1938,1923,1930,1941,1933,1941,1943,1947,1950,1940,1958,1951,1950,1952,1952,1954,1943,1943,1957,1967,1961,1943,1944,1955,1947,1951,1960,1950,1958,1961,1962,1964,1965,1966,1950,1957,1967,1977,1982,1977,1981,1979,1973,1965,1955,1957,1953,1935,1939,1945,1925,1928,1913,1912,1927,1936,1929,1932,1933,1926,1939,1935,1938,1951,1952,1966,1965,1958,1960,1970,1970,1971,1958,1958,1945,1934,1938,1931,1925,1919,1918,1911,1926,1918,1926,1926,1925,1934,1923,1918,1918,1915,1921,1921,1923,1914,1906,1891,1880,1888,1882,1886,1886,1879,1883,1890,1877,1890,1904,1906,1902,1901,1896,1891,1877,1871,1886,1888,1879,1887,1890,1891,1889,1882,1880,1878,1875,1873,1869,1872,1880,1880,1880,1893,1897,1907,1905,1906,1909,1916,1908,1888,1886,1890,1895,1915,1915,1912,1920,1923,1921,1920,1918,1918,1931,1923,1922,1917,1915,1910,1914,1922,1938,1946,1947,1947,1940,1947,1949,1939,1928,1931,1928,1929,1938,1940,1939,1933,1939,1945,1951,1954,1948,1946,1962,1979,1979,1982,1981,1989,1999,1998,1998,2005,2006,2004,2004,2017,2018,2014,2010,2013,2009,1983,1970,1967,1967,1964,1954,1953,1955,1957,1958,1967,1960,1941,1943,1940,1950,1956,1955,1945,1936,1921,1913,1915,1904,1906,1906,1914,1911,1918,1914,1912,1912,1923,1922,1912,1913,1908,1908,1898,1905,1900,1896,1893,1911,1915,1912,1910,1917,1921,1912,1913,1917,1917,1934,1947,1957,1961,1967,1969,1976,1974,1971,1967,1967,1959,1952,1961,1963,1959,1955,1960,1965,1965,1957,1946,1944,1941,1942,1957,1949,1952,1952,1945,1940,1939,1942,1953,1959,1946,1945,1940,1924,1926,1923,1921,1903,1894,1885,1885,1892,1888,1891,1908,1911,1907,1917,1911,1911,1909,1902,1912,1923,1913,1920,1917,1915,1931,1943,1941,1950,1943,1946,1947,1936,1931,1946,1963,1960,1958,1966,1967,1967,1970,1978,1970,1976,1981,1987,1994,1991,1986,1975,1972,1971,1968,1975,1971,1976,1955,1935,1936,1928,1931,1953,1953,1937,1941,1954,1973,...,1905,1907,1920,1926,1936,1926,1920,1931,1929,1919,1924,1926,1918,1920,1915,1907,1896,1896,1884,1885,1879,1886,1891,1896,1884,1899,1913,1917,1903,1892,1885,1889,1890,1886,1885,1875,1884,1880,1896,1890,1892,1886,1899,1912,1916,1916,1917,1934,1937,1940,1940,1920,1927,1919,1924,1923,1920,1915,1896,1891,1887,1893,1899,1903,1899,1893,1905,1907,1912,1916,1928,1934,1929,1941,1935,1941,1933,1934,1936,1927,1933,1932,1928,1935,1935,1946,1953,1947,1952,1967,1960,1954,1950,1948,1955,1959,1961,1957,1949,1949,1950,1950,1942,1946,1944,1942,1950,1954,1950,1954,1947,1944,1950,1950,1941,1939,1944,1944,1952,1943,1938,1932,1943,1938,1942,1957,1947,1950,1955,1962,1963,1961,1956,1952,1952,1948,1933,1929,1929,1923,1926,1924,1920,1926,1914,1915,1903,1905,1918,1909,1919,1916,1917,1913,1915,1913,1912,1913,1916,1913,1909,1908,1912,1916,1927,1927,1928,1944,1945,1959,1960,1964,1965,1961,1963,1963,1960,1957,1953,1953,1954,1956,1954,1952,1956,1953,1950,1952,1953,1942,1954,1950,1954,1950,1944,1930,1926,1923,1912,1921,1911,1907,1909,1907,1917,1919,1912,1914,1920,1920,1909,1907,1900,1898,1900,1888,1890,1895,1880,1887,1894,1881,1885,1877,1878,1883,1884,1876,1876,1879,1882,1896,1898,1916,1912,1909,1925,1925,1928,1929,1928,1931,1931,1927,1940,1910,1899,1901,1911,1911,1905,1894,1899,1899,1899,1887,1887,1888,1881,1893,1899,1894,1888,1884,1881,1883,1887,1884,1869,1873,1883,1883,1889,1890,1891,1893,1905,1893,1908,1914,1911,1900,1889,1897,1891,1891,1898,1905,1915,1919,1919,1917,1935,1937,1935,1933,1930,1930,1941,1948,1939,1934,1923,1928,1917,1918,1920,1915,1919,1932,1923,1930,1932,1920,1931,1923,1932,1933,1933,1934,1941,1935,1940,1941,1947,1956,1952,1959,1951,1950,1937,1930,1933,1941,1929,1920,1929,1929,1916,1913,1898,1903,1901,1906,1909,1912,1898,1900,1893,1899,1907,1904,1907,1902,1899,1894,1903,1909,1909,1914,1915,1913,1906,1909,1915,1924,1922,1939,1950,1947,1952,1949,1951,1949,1953,1951,1965,1957,1973,1975,1957,1953,1949,1950,1932,1934,1941,1930,1915,1930,1928,1927,1925,1921,1932,1942,1921,1922,1928,1955,1987,2019,2057,2090,2118,2145,2174,2214,2254,2284,2313,2350,2383,2429,2460,2486,2518,2552,2585,2615,2650,2684,2721,2757,2782,2817,2847,2877,2908,2944,2980,3008,3044,3081,3107,3130,3151,3190,3218,3246,3274,3297,3320,3345,3368,3386,3421,3450,3473,3488,3523,3548,3576,3603,3625,3649,3663,3681,3698,3714,3726,3751,3770,3789,3805,3822,3834,3849,3864,3880,3892,3905,3910,3919,3934,3941,3949,3961,3968,3973,3981,3983,3990,3993,3996,3998,3999,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000,4000
1,9,19,30,41,56,75,83,92,100,109,128,140,145,155,169,178,191,200,216,226,231,243,256,260,262,266,275,286,291,299,308,314,315,319,320,324,333,338,347,353,360,371,382,379,383,385,392,400,406,406,401,400,401,408,412,422,434,433,440,450,447,451,452,466,471,466,470,474,475,476,479,488,498,502,508,511,510,515,526,528,535,532,539,534,547,543,549,542,541,548,550,551,551,541,537,537,542,549,550,546,537,524,532,535,528,528,532,527,519,517,509,504,504,510,514,514,517,519,519,522,526,525,531,527,526,523,520,521,519,518,511,512,507,506,511,513,515,519,519,521,521,520,519,517,519,517,516,515,518,526,529,529,524,519,523,520,511,510,505,501,492,493,500,499,490,495,487,474,471,477,471,475,476,469,468,472,471,478,475,476,479,481,489,484,480,478,474,470,485,485,489,491,486,481,489,485,482,489,491,492,490,483,480,482,480,485,486,490,492,490,493,493,497,496,494,498,504,508,508,505,498,499,496,505,499,490,499,497,502,505,501,503,499,499,501,507,516,517,523,521,525,535,537,535,543,547,539,545,536,537,534,537,535,544,536,533,535,536,538,538,542,548,547,557,558,560,567,569,568,570,575,569,563,557,554,558,557,555,554,560,565,560,558,554,557,555,544,549,557,558,557,563,564,573,570,565,565,563,557,550,551,553,553,555,563,552,547,549,552,560,568,572,575,569,566,569,574,573,566,566,568,564,564,549,539,530,529,523,510,514,508,502,504,505,502,497,501,505,512,511,515,523,524,524,520,513,518,515,516,522,522,525,526,537,545,548,543,538,532,533,529,530,532,540,537,540,544,544,545,547,546,546,543,544,545,546,541,544,541,537,534,538,540,538,534,539,548,542,534,533,537,540,544,545,540,534,529,526,523,526,516,521,518,520,522,520,522,532,529,535,535,542,539,538,532,535,526,527,519,511,517,518,516,511,511,510,511,511,511,507,509,516,513,517,517,518,525,524,528,531,531,533,541,534,537,534,532,529,535,527,528,525,524,524,516,521,527,528,529,526,522,524,523,522,525,524,530,519,514,515,514,507,507,507,505,509,505,501,498,500,496,502,502,513,512,513,502,505,500,496,501,499,501,505,498,493,501,503,499,492,...,511,511,515,508,504,515,519,513,511,510,507,501,500,504,506,508,510,511,518,529,519,515,520,520,517,519,517,514,517,520,519,518,517,516,511,512,510,515,514,521,522,525,524,522,528,527,523,521,524,531,529,539,533,539,538,544,547,552,561,551,553,549,557,557,554,554,552,547,545,541,539,540,537,531,537,539,535,528,534,538,534,540,542,536,535,529,529,533,531,532,531,518,518,523,515,518,518,517,517,507,508,509,504,516,515,521,518,521,517,515,516,517,512,510,512,514,511,512,507,512,508,509,517,516,517,512,513,511,513,516,513,512,515,521,517,521,521,523,517,522,529,524,522,519,525,536,538,528,528,533,538,534,531,529,528,527,529,527,524,511,514,514,517,530,530,529,531,528,529,526,526,522,516,516,520,524,522,518,513,506,513,513,517,511,508,514,511,514,515,518,513,519,517,525,519,527,524,518,519,513,517,516,520,519,509,500,506,503,498,507,505,499,499,504,505,502,497,494,498,492,490,497,495,495,493,493,489,491,490,488,485,481,479,483,494,511,507,502,497,497,497,493,494,497,496,498,497,492,494,500,506,508,518,524,522,531,532,534,534,534,530,535,531,539,542,540,545,556,550,543,541,547,548,543,542,536,534,542,542,538,548,549,550,542,547,550,552,552,542,536,531,531,516,504,510,514,514,508,505,504,495,498,505,509,517,520,523,528,528,520,518,521,530,538,528,528,530,533,532,526,528,528,523,524,514,514,513,501,501,503,503,507,514,514,525,533,524,531,533,534,545,540,545,532,530,537,545,539,541,537,527,527,523,527,518,518,509,512,512,513,518,523,521,515,516,511,510,505,502,504,500,495,490,490,491,492,489,489,486,493,493,494,488,489,491,488,491,496,496,496,501,498,500,501,494,495,509,503,495,491,482,475,461,455,449,444,436,425,419,416,409,393,388,375,370,364,363,360,355,345,337,329,322,311,304,294,286,279,266,260,248,238,227,218,208,196,190,186,177,171,164,156,153,149,142,139,132,124,118,115,110,107,100,92,85,82,81,80,76,71,70,62,58,50,47,42,39,36,32,27,27,24,23,20,16,14,12,11,9,7,3,3,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,16,27,40,57,70,91,104,121,138,153,165,176,187,192,208,217,224,230,239,254,264,272,282,292,304,311,318,318,330,333,340,347,348,350,356,368,375,385,383,383,392,400,404,414,417,433,445,444,452,449,456,454,457,460,462,467,475,478,482,486,496,501,488,481,482,492,496,498,508,502,496,498,498,498,504,500,503,504,507,513,517,519,516,521,524,523,523,533,543,537,538,535,538,551,553,556,556,556,559,566,566,569,573,571,574,572,572,567,563,557,554,551,547,551,553,556,555,551,547,545,539,538,535,537,531,526,533,534,531,527,529,527,519,512,512,512,511,506,508,515,514,513,518,518,522,518,519,521,508,507,510,512,514,509,517,513,507,500,505,517,523,527,525,520,518,515,517,522,526,526,522,520,518,525,520,516,511,515,519,518,509,508,516,515,515,521,520,523,521,520,530,529,528,526,524,524,526,527,523,527,525,524,515,509,507,513,519,516,515,519,528,538,542,540,545,552,553,547,538,549,554,559,560,553,553,556,560,562,561,555,549,554,559,567,567,572,572,574,574,570,557,559,561,560,562,558,549,542,545,533,535,539,549,547,546,545,550,542,543,532,541,541,553,550,558,563,558,556,551,547,545,540,538,536,531,529,531,521,523,526,537,525,519,522,523,524,519,517,521,519,526,532,526,517,518,527,532,540,542,543,540,536,539,534,537,536,544,549,553,546,540,539,539,535,534,532,537,541,541,533,534,536,523,517,519,532,538,535,540,541,557,553,557,556,554,553,548,549,542,542,538,539,542,541,543,543,544,548,551,548,552,550,541,546,541,539,535,536,541,536,542,545,553,549,553,543,545,549,550,545,547,545,549,545,543,542,540,539,538,546,541,539,546,558,558,557,552,553,559,557,557,555,550,541,530,522,521,507,509,511,512,508,504,507,505,506,505,503,507,498,503,502,507,517,516,518,522,523,524,523,516,510,511,518,515,514,515,514,512,516,518,518,532,531,532,532,536,538,540,536,539,541,532,520,517,527,523,527,527,528,529,531,523,528,533,525,524,524,526,524,515,520,519,511,514,511,503,493,502,505,499,491,488,493,488,491,483,482,477,469,471,470,480,479,483,484,485,484,487,495,504,509,511,507,501,501,502,498,495,495,...,533,533,523,531,534,537,538,541,532,531,528,528,532,534,540,540,542,541,546,541,539,542,535,527,533,521,515,512,510,508,511,514,519,519,520,524,528,526,519,518,516,520,518,514,507,511,518,513,513,506,504,510,505,502,495,496,490,486,490,496,500,501,491,492,500,499,498,505,510,511,517,516,513,520,518,517,514,512,507,511,510,495,493,487,492,485,480,483,489,487,490,494,491,491,491,490,487,490,484,488,490,493,497,494,491,491,495,495,498,495,497,499,494,496,494,492,490,490,491,490,492,498,494,489,487,488,494,494,494,488,490,494,495,497,500,500,506,509,508,506,497,497,498,509,514,513,516,520,514,518,511,516,516,519,520,520,519,529,534,545,547,544,531,528,528,519,519,522,524,518,514,512,517,511,510,511,504,509,518,513,513,514,503,506,505,499,498,499,497,510,506,502,508,493,496,497,504,504,498,498,499,501,501,492,492,499,502,502,506,501,502,504,510,501,507,505,507,505,510,513,523,524,522,523,525,521,527,526,525,521,523,519,518,509,507,513,516,524,527,529,526,522,520,522,517,517,524,529,529,526,524,526,518,517,512,514,512,509,516,511,504,499,494,496,496,489,485,485,488,490,491,492,486,484,486,482,481,480,478,478,479,484,490,500,504,504,495,491,493,493,496,494,489,491,490,495,496,502,500,499,501,499,502,503,505,503,504,503,507,509,517,514,509,509,514,514,515,515,511,505,505,517,522,521,520,517,517,521,516,515,519,523,516,516,520,522,520,514,517,525,533,534,530,529,530,524,531,530,526,528,539,537,540,549,558,555,553,551,550,550,556,558,562,564,560,561,567,559,548,545,548,547,541,549,547,555,543,543,531,526,526,533,536,531,536,532,528,535,543,539,537,538,530,536,535,526,520,528,528,520,512,500,493,487,479,470,459,453,446,440,435,425,409,398,385,380,367,359,348,343,333,327,316,309,306,304,296,288,283,276,266,259,247,239,233,228,230,220,210,205,198,192,186,180,175,173,167,158,149,145,137,127,120,113,108,98,91,86,81,80,74,65,57,55,51,46,44,41,36,31,27,22,22,19,16,14,13,10,8,7,6,5,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,14,28,46,62,80,91,106,119,131,142,161,169,180,194,201,217,231,247,256,261,267,281,287,300,309,323,333,340,354,370,375,379,377,377,395,398,405,409,418,424,427,428,433,434,438,443,447,455,455,466,477,479,482,480,484,487,493,495,497,503,501,514,521,520,523,526,521,520,524,536,539,541,535,536,531,529,533,527,520,524,526,536,535,529,522,520,522,520,518,510,509,507,504,509,507,501,503,502,503,501,502,503,502,500,510,513,511,517,530,526,521,521,521,516,518,525,523,517,516,508,506,516,525,519,518,523,529,526,529,533,534,537,538,532,534,525,522,527,519,518,523,521,517,516,519,513,510,514,503,508,509,510,513,520,521,521,521,516,518,519,516,504,506,509,512,516,517,518,520,516,520,514,518,521,527,524,526,522,518,518,524,528,531,537,542,547,547,547,550,550,550,548,540,542,543,540,537,537,534,531,536,534,542,539,546,548,548,545,542,537,540,530,534,539,539,544,540,545,539,536,530,524,521,517,520,517,512,515,512,518,519,511,507,510,510,514,514,514,518,518,520,518,514,512,516,508,509,506,511,518,518,517,517,518,515,515,516,519,515,524,512,507,496,491,490,487,490,492,494,495,492,488,495,495,504,501,503,508,510,517,506,515,514,503,499,504,509,504,500,502,503,496,488,494,494,491,494,494,489,487,479,482,491,489,482,481,477,477,489,480,482,477,474,473,470,473,463,465,466,464,466,468,465,467,469,464,470,477,477,474,470,472,463,459,467,465,469,467,469,474,475,480,481,479,482,492,492,496,489,487,484,477,481,487,479,486,494,488,496,501,498,502,499,495,496,497,493,492,496,498,505,506,503,504,503,505,511,516,513,522,520,522,515,512,499,493,492,496,496,500,508,504,501,503,504,501,500,505,510,508,513,508,516,517,512,514,514,506,504,501,498,493,489,478,486,488,493,492,493,489,493,494,497,497,499,500,502,493,492,500,501,502,507,509,516,512,523,525,529,523,519,518,521,521,520,514,512,511,505,514,518,509,507,509,510,519,521,509,499,500,501,504,500,501,507,511,501,508,506,504,499,507,516,512,513,513,518,516,514,519,523,526,525,525,522,522,519,524,523,526,531,533,541,538,534,532,532,536,534,521,...,517,514,518,515,514,511,513,517,523,530,534,535,537,534,541,541,543,543,543,538,545,542,548,550,555,551,545,544,547,555,552,552,550,547,550,546,538,536,532,535,534,532,532,534,530,526,525,520,515,515,517,521,522,519,515,513,518,511,512,514,511,508,514,509,507,508,504,509,508,508,499,494,493,481,481,482,488,493,496,503,504,515,514,512,516,517,517,511,510,506,502,514,516,517,521,519,521,519,527,522,523,525,528,524,533,534,535,537,535,539,540,538,540,538,546,545,547,551,548,553,554,551,547,549,540,532,538,538,533,532,528,525,528,522,525,522,527,527,528,530,535,534,535,525,521,515,518,521,527,520,517,520,520,521,521,524,525,518,515,519,519,528,536,530,525,534,536,535,534,527,529,533,534,539,531,529,534,541,545,557,551,551,557,560,556,554,557,550,550,543,539,539,535,537,544,543,538,543,550,544,543,544,539,543,543,544,551,545,549,546,556,557,555,556,552,557,558,552,546,548,546,556,554,562,562,564,559,560,560,565,564,558,553,550,548,533,524,525,527,526,524,525,521,530,526,538,543,539,531,522,520,521,518,513,516,519,518,520,515,510,508,513,517,524,519,523,522,516,531,533,528,524,522,524,521,524,518,520,512,510,506,507,510,517,517,516,522,527,525,527,532,533,539,544,537,529,529,531,528,523,534,535,529,526,532,528,530,526,525,522,521,518,512,516,513,514,501,499,493,499,494,489,484,485,489,489,490,492,501,496,504,504,499,497,503,503,501,496,495,488,495,489,492,492,491,490,487,490,492,489,485,492,493,495,493,497,492,489,493,491,484,480,483,482,483,477,476,480,483,486,487,493,497,495,494,490,487,492,495,494,500,503,500,501,505,509,507,503,505,499,497,493,500,504,508,512,518,515,519,516,511,504,495,486,480,475,466,454,443,436,432,428,421,407,399,390,380,368,358,351,343,334,328,317,310,296,290,285,277,266,256,250,243,235,229,227,217,200,198,189,180,178,172,162,155,147,134,129,125,121,109,103,102,94,90,87,83,77,75,71,67,65,63,60,57,53,49,43,39,35,31,27,24,21,18,17,15,11,8,8,6,6,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,12,22,37,53,62,73,86,99,108,119,126,141,150,163,167,183,189,203,218,228,242,253,260,271,288,295,297,308,309,318,325,333,346,358,367,378,383,388,391,389,394,399,410,408,411,416,421,431,436,433,435,444,444,447,443,447,447,453,455,459,454,457,460,466,458,461,465,457,465,465,463,469,472,477,479,482,484,482,477,480,480,476,467,471,474,478,478,476,479,486,489,491,494,488,489,499,497,496,489,488,496,503,501,498,496,501,503,509,507,514,514,509,511,512,506,507,504,510,505,501,503,498,490,490,496,499,503,495,502,500,507,511,515,527,526,535,528,519,516,523,512,505,513,508,497,505,505,510,513,508,502,497,497,498,496,503,504,507,511,520,525,521,522,521,520,524,521,525,521,517,522,525,538,528,518,511,510,508,507,509,515,518,509,507,510,519,520,515,519,517,518,520,519,515,515,519,522,521,513,515,511,508,511,504,502,496,487,479,481,483,481,481,482,491,484,475,478,481,497,499,492,500,497,499,503,503,506,508,507,507,510,511,512,510,516,516,518,507,503,505,512,509,505,503,502,497,499,501,506,511,517,516,522,520,517,519,520,516,514,515,516,522,524,524,519,517,516,511,507,508,508,510,507,505,506,506,500,500,505,509,506,510,514,506,506,505,508,507,501,501,496,491,491,493,496,500,494,493,498,498,492,483,470,475,478,484,483,486,478,483,482,483,474,483,491,493,487,476,476,483,484,486,486,488,494,492,482,476,474,473,467,468,470,476,473,468,464,465,467,460,463,475,483,489,488,488,492,488,489,486,484,481,492,489,492,487,478,482,476,485,495,502,503,501,510,514,512,501,498,492,488,491,493,484,487,495,495,493,500,497,500,501,503,499,498,496,496,499,494,489,486,488,488,494,492,496,493,501,491,486,483,489,491,489,494,501,507,498,497,507,509,503,500,502,509,513,515,517,522,520,525,526,524,529,535,537,530,529,526,531,527,524,524,517,512,517,513,519,518,525,519,520,515,517,515,518,516,522,522,522,523,523,523,526,521,518,513,508,503,509,512,509,512,519,518,523,520,517,518,518,522,528,522,518,516,517,518,520,524,518,519,516,518,511,512,513,519,516,514,528,529,523,519,519,514,521,528,522,518,519,...,534,535,524,520,512,511,510,498,505,510,507,510,513,508,498,504,509,509,509,507,518,515,506,507,511,510,510,513,523,525,533,527,524,532,534,543,540,543,539,536,536,537,527,518,519,520,517,512,511,508,510,510,513,521,528,524,525,536,541,548,549,549,539,539,540,546,541,532,525,524,517,516,528,527,529,521,530,533,527,521,519,518,523,530,522,523,521,526,518,508,517,520,525,521,518,514,513,517,523,534,529,523,529,520,517,512,502,493,500,497,500,502,504,506,507,510,508,503,502,502,508,510,499,508,514,511,508,507,505,502,506,508,506,508,506,509,513,512,518,519,513,521,525,521,526,521,525,526,513,520,515,514,516,518,516,516,515,513,511,512,511,506,504,496,490,491,486,471,468,470,471,469,468,473,476,473,480,475,471,471,469,466,469,471,475,480,484,485,485,487,488,490,486,495,497,503,508,512,521,524,530,532,531,539,539,538,529,536,527,526,528,533,536,541,536,548,548,554,566,560,547,542,544,543,542,539,541,547,549,547,546,546,552,542,539,534,528,524,521,519,525,529,534,524,521,537,537,539,535,541,545,551,547,547,551,549,551,549,554,552,559,559,570,557,562,565,561,559,562,561,557,554,555,559,560,565,562,565,560,560,556,560,561,544,541,539,533,525,525,525,522,525,521,524,528,529,531,529,526,526,531,534,541,534,529,531,523,528,521,517,521,517,517,517,514,521,522,520,531,536,532,531,531,529,530,524,528,527,531,536,537,536,538,532,523,522,526,530,539,540,529,534,532,541,540,537,539,541,548,547,542,540,537,527,532,536,543,539,536,532,527,526,528,530,526,527,525,517,517,518,513,516,521,517,515,512,516,519,515,512,524,517,527,529,536,537,533,536,541,536,537,544,545,538,531,525,532,532,530,518,508,502,494,482,474,466,465,454,438,424,411,404,399,391,386,380,372,361,354,346,337,326,313,306,298,289,281,271,266,254,250,245,239,227,223,219,212,204,197,189,184,177,169,164,160,155,146,139,135,131,121,115,102,98,92,84,82,76,70,64,63,57,52,46,40,37,34,31,29,27,23,22,21,21,16,14,11,7,7,5,4,3,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [51]:
def get_val_output(model,weight_path,val_data,test_data,model_num):
    model.load_weights(weight_path)
    val_pred = model.predict(val_data)
    global val_pred_df, submission_data
    #val_pred_df = train_data.iloc[:val_x.shape[0]*val_x.shape[1]].copy()
    for i in range(val_pred.shape[-1]):
        val_pred_df['pred_{}_{}'.format(model_num,i)] = val_pred[:,:,i].reshape(val_pred.shape[0]*val_pred.shape[1]) 
    val_pred_df['pred_{}'.format(model_num)] = val_pred.argmax(axis=-1).reshape(val_pred.shape[0]*val_pred.shape[1])
    
    test_pred = model.predict(test_data)
    for i in range(test_pred.shape[-1]):
        submission_data['pred_{}_{}'.format(model_num,i)] = test_pred[:,:,i].reshape(test_pred.shape[0]*test_pred.shape[1])
    submission_data['pred_{}'.format(model_num)] = test_pred.argmax(axis=-1).reshape(test_pred.shape[0]*test_pred.shape[1])
        

In [66]:
val_pred_df = train_data.iloc[:val_x.shape[0]*val_x.shape[1]].copy()
submission_data = pd.read_csv('../input/gamma-ray-identification/CAX_LogFacies_Submission_File.csv')

In [53]:
val_x1 = train_data[['GR_rate']].values.reshape(num_train_well,max_len,1)[test_index]
val_x2 = train_data[['processed_GR2','GR_rate']].values.reshape(num_train_well,max_len,2)[test_index]
test_x1 = test_data[['GR_rate']].values.reshape(num_test_well,max_len,1)
test_x2 = test_data[['processed_GR2','GR_rate']].values.reshape(num_test_well,max_len,2)


In [56]:
test_model = get_simple_model()
get_val_output(test_model,'../working/weights_simple_lstm_1.hdf5',val_x1,test_x1,1)

ValueError: You are trying to load a weight file containing 2 layers into a model with 3 layers.

In [67]:
test_model = get_simple_cnn_model()
get_val_output(test_model,'../working/weights_simple_lstm_cnn_1.hdf5',val_x1,test_x1,2)

In [68]:
test_model = get_deep_cnn_model()
get_val_output(test_model,'../working/weights_simple_lstm_deep_cnn_1.hdf5',val_x1,test_x1,3)

In [69]:
test_model = get_deep_cnn_lstm_cnn_model()
get_val_output(test_model,'../working/weights_simple_lstm_deep_cnn_lstm_1_50epochs.hdf5',val_x1,test_x1,4)

In [70]:
test_model = get_deep_cnn_model2()
get_val_output(test_model,'../working/weights_simple_lstm_deep_cnn2_1.hdf5',val_x2,test_x2,5)

In [71]:
test_model = get_attn_model2()
get_val_output(test_model,'../working/weights_deep_cnn_lstm_bn_attention_1_best.hdf5',val_x2,test_x2,6)

In [72]:
test_model = get_multout_model()
test_model.load_weights('../working/weights_deep_cnn_lstm_bn_multout_1.hdf5')

model_num = 7
for out in test_model.predict(val_x2):
    for i in range(5):
        val_pred_df['pred_{}_{}'.format(model_num,i)] = out[:,:,i].reshape(out.shape[0]*out.shape[1])
    val_pred_df['pred_{}'.format(model_num)] = out.argmax(axis=-1).reshape(out.shape[0]*out.shape[1])
    model_num += 1
    
model_num = 7
for out in test_model.predict(test_x2):
    for i in range(5):
        submission_data['pred_{}_{}'.format(model_num,i)] = out[:,:,i].reshape(out.shape[0]*out.shape[1])
    submission_data['pred_{}'.format(model_num)] = out.argmax(axis=-1).reshape(out.shape[0]*out.shape[1])
    model_num += 1

In [73]:
test_model = get_deep_lstm_model()
get_val_output(test_model,'../working/weights_very_deep_lstm2_cnn_lstm_1.hdf5',val_x2,test_x2,12)

In [74]:
test_model = get_very_deep_lstm_cnn_bn_model()
get_val_output(test_model,'../working/weights_very_deep_cnn_lstm_bn_1.hdf5',val_x2,test_x2,13)

In [75]:
test_model = get_very_deep_lstm_cnn_bn_model_with_attn()
get_val_output(test_model,'../working/weights_very_deep_cnn_lstm_bn_attn_1.hdf5',val_x2,test_x2,14)

In [76]:
val_pred_df.columns

Index(['row_id', 'well_id', 'GR', 'label', 'processed_GR', 'processed_GR2',
       'GR_rate', 'pred_2_0', 'pred_2_1', 'pred_2_2', 'pred_2_3', 'pred_2_4',
       'pred_2', 'pred_3_0', 'pred_3_1', 'pred_3_2', 'pred_3_3', 'pred_3_4',
       'pred_3', 'pred_4_0', 'pred_4_1', 'pred_4_2', 'pred_4_3', 'pred_4_4',
       'pred_4', 'pred_5_0', 'pred_5_1', 'pred_5_2', 'pred_5_3', 'pred_5_4',
       'pred_5', 'pred_6_0', 'pred_6_1', 'pred_6_2', 'pred_6_3', 'pred_6_4',
       'pred_6', 'pred_7_0', 'pred_7_1', 'pred_7_2', 'pred_7_3', 'pred_7_4',
       'pred_7', 'pred_8_0', 'pred_8_1', 'pred_8_2', 'pred_8_3', 'pred_8_4',
       'pred_8', 'pred_9_0', 'pred_9_1', 'pred_9_2', 'pred_9_3', 'pred_9_4',
       'pred_9', 'pred_10_0', 'pred_10_1', 'pred_10_2', 'pred_10_3',
       'pred_10_4', 'pred_10', 'pred_11_0', 'pred_11_1', 'pred_11_2',
       'pred_11_3', 'pred_11_4', 'pred_11', 'pred_12_0', 'pred_12_1',
       'pred_12_2', 'pred_12_3', 'pred_12_4', 'pred_12', 'pred_13_0',
       'pred_13_1', 'pred_13

In [98]:
val_pred_df.to_csv('../data/ensemble_valdata_new.csv',index=False)
submission_data.to_csv('../data/ensemble_testdata_new.csv',index=False)

In [77]:
val_pred_df[val_pred_df.label != val_pred_df.pred_4][['row_id','well_id','label','pred_2','pred_3','pred_4','pred_5','pred_6', 'pred_7', 'pred_8','pred_9','pred_10','pred_11','pred_12','pred_13','pred_14']].head(50)

Unnamed: 0,row_id,well_id,label,pred_2,pred_3,pred_4,pred_5,pred_6,pred_7,pred_8,pred_9,pred_10,pred_11,pred_12,pred_13,pred_14
315,315,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
361,361,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0
362,362,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0
363,363,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
460,460,0,3,0,0,0,3,3,0,3,3,3,3,0,3,3
461,461,0,3,0,0,0,0,3,0,3,3,3,3,0,0,3
593,593,0,4,2,2,2,2,2,1,2,2,2,2,2,2,2
594,594,0,4,2,2,2,2,2,2,2,2,2,2,2,2,2
595,595,0,4,2,2,2,2,2,2,2,2,2,2,2,2,2
596,596,0,4,2,2,2,2,2,2,2,2,2,2,2,2,2


In [78]:
val_pred_df[['label','pred_2','pred_3','pred_4','pred_5','pred_6', 'pred_7', 'pred_8','pred_9','pred_10','pred_11','pred_12','pred_13','pred_14']].corr()

Unnamed: 0,label,pred_2,pred_3,pred_4,pred_5,pred_6,pred_7,pred_8,pred_9,pred_10,pred_11,pred_12,pred_13,pred_14
label,1.0,0.926059,0.927152,0.943949,0.937754,0.934296,0.517419,0.909493,0.913888,0.922529,0.922876,0.941711,0.950285,0.950006
pred_2,0.926059,1.0,0.964522,0.947887,0.950083,0.943498,0.528181,0.928123,0.928178,0.93435,0.934784,0.942213,0.941148,0.940763
pred_3,0.927152,0.964522,1.0,0.950516,0.951223,0.945781,0.523588,0.932102,0.93095,0.937051,0.936982,0.945238,0.943595,0.943042
pred_4,0.943949,0.947887,0.950516,1.0,0.958393,0.954035,0.518793,0.928089,0.933038,0.94152,0.942169,0.960525,0.962703,0.961837
pred_5,0.937754,0.950083,0.951223,0.958393,1.0,0.958877,0.523711,0.937807,0.941431,0.947379,0.947883,0.957702,0.957659,0.957214
pred_6,0.934296,0.943498,0.945781,0.954035,0.958877,1.0,0.515474,0.939842,0.950413,0.958179,0.959545,0.953299,0.954806,0.954995
pred_7,0.517419,0.528181,0.523588,0.518793,0.523711,0.515474,1.0,0.512569,0.502633,0.508089,0.507558,0.518796,0.518447,0.51799
pred_8,0.909493,0.928123,0.932102,0.928089,0.937807,0.939842,0.512569,1.0,0.952419,0.946973,0.945378,0.928515,0.923442,0.923082
pred_9,0.913888,0.928178,0.93095,0.933038,0.941431,0.950413,0.502633,0.952419,1.0,0.975276,0.973969,0.933187,0.93074,0.931131
pred_10,0.922529,0.93435,0.937051,0.94152,0.947379,0.958179,0.508089,0.946973,0.975276,1.0,0.985227,0.941624,0.940625,0.941041


In [79]:
val_pred_df[val_pred_df.label != val_pred_df.pred_12][val_pred_df.label != val_pred_df.pred_13][val_pred_df.label != val_pred_df.pred_14][val_pred_df.label != val_pred_df.pred_2][val_pred_df.label != val_pred_df.pred_3][val_pred_df.label != val_pred_df.pred_4][val_pred_df.label != val_pred_df.pred_5][val_pred_df.label != val_pred_df.pred_6][val_pred_df.label != val_pred_df.pred_7][val_pred_df.label != val_pred_df.pred_8][val_pred_df.label != val_pred_df.pred_9][val_pred_df.label != val_pred_df.pred_10][val_pred_df.label != val_pred_df.pred_11].label.value_counts()

  """Entry point for launching an IPython kernel.


3    2652
4    2479
1    2181
2     762
0     157
Name: label, dtype: int64

In [80]:
1 - val_pred_df[val_pred_df.label != val_pred_df.pred_12][val_pred_df.label != val_pred_df.pred_13][val_pred_df.label != val_pred_df.pred_14][val_pred_df.label != val_pred_df.pred_2][val_pred_df.label != val_pred_df.pred_3][val_pred_df.label != val_pred_df.pred_4][val_pred_df.label != val_pred_df.pred_5][val_pred_df.label != val_pred_df.pred_6][val_pred_df.label != val_pred_df.pred_7][val_pred_df.label != val_pred_df.pred_8][val_pred_df.label != val_pred_df.pred_9][val_pred_df.label != val_pred_df.pred_10][val_pred_df.label != val_pred_df.pred_11].label.value_counts().shape[0]*1.0/val_pred_df.shape[0]

  """Entry point for launching an IPython kernel.


0.9999943181818182

In [95]:
submission_data['processed_GR'] = test_data.processed_GR
submission_data['processed_GR2'] = test_data.processed_GR2
submission_data['GR_rate'] = test_data.GR_rate
submission_data['GR'] = test_data.GR
submission_data['well_id'] = test_data.well_id
submission_data['row_id'] = test_data.row_id

In [83]:
for col in ['pred_2','pred_3','pred_4','pred_5','pred_6', 'pred_7', 'pred_8','pred_9','pred_10','pred_11','pred_12','pred_13','pred_14']:
    val_pred_df["{}_shift".format(col)] = val_pred_df.groupby('well_id')[col].shift(1)
    val_pred_df["{}_shift".format(col)] = val_pred_df["{}_shift".format(col)].fillna(method='bfill')
    
    submission_data["{}_shift".format(col)] = submission_data.groupby('well_id')[col].shift(1)
    submission_data["{}_shift".format(col)] = submission_data["{}_shift".format(col)].fillna(method='bfill')

In [84]:
for col in ['processed_GR','processed_GR2','GR_rate']:
    for shift in [11,22,55,110]:
        val_pred_df["rolling_{}_mean_{}".format(col,shift)] = val_pred_df[col].rolling(shift).mean()
        val_pred_df["rolling_{}_std_{}".format(col,shift)] = val_pred_df[col].rolling(shift).std()
        val_pred_df["rolling_{}_mean_{}".format(col,shift)] = val_pred_df["rolling_{}_mean_{}".format(col,shift)].fillna(method='bfill')
        val_pred_df["rolling_{}_std_{}".format(col,shift)] = val_pred_df["rolling_{}_std_{}".format(col,shift)].fillna(method='bfill')

In [85]:
for col in ['processed_GR','processed_GR2','GR_rate']:
    for shift in [11,22,55,110]:
        submission_data["rolling_{}_mean_{}".format(col,shift)] = submission_data[col].rolling(shift).mean()
        submission_data["rolling_{}_std_{}".format(col,shift)] = submission_data[col].rolling(shift).std()
        submission_data["rolling_{}_mean_{}".format(col,shift)] = submission_data["rolling_{}_mean_{}".format(col,shift)].fillna(method='bfill')
        submission_data["rolling_{}_std_{}".format(col,shift)] = submission_data["rolling_{}_std_{}".format(col,shift)].fillna(method='bfill')

In [86]:
list(val_pred_df.columns)

['row_id',
 'well_id',
 'GR',
 'label',
 'processed_GR',
 'processed_GR2',
 'GR_rate',
 'pred_2_0',
 'pred_2_1',
 'pred_2_2',
 'pred_2_3',
 'pred_2_4',
 'pred_2',
 'pred_3_0',
 'pred_3_1',
 'pred_3_2',
 'pred_3_3',
 'pred_3_4',
 'pred_3',
 'pred_4_0',
 'pred_4_1',
 'pred_4_2',
 'pred_4_3',
 'pred_4_4',
 'pred_4',
 'pred_5_0',
 'pred_5_1',
 'pred_5_2',
 'pred_5_3',
 'pred_5_4',
 'pred_5',
 'pred_6_0',
 'pred_6_1',
 'pred_6_2',
 'pred_6_3',
 'pred_6_4',
 'pred_6',
 'pred_7_0',
 'pred_7_1',
 'pred_7_2',
 'pred_7_3',
 'pred_7_4',
 'pred_7',
 'pred_8_0',
 'pred_8_1',
 'pred_8_2',
 'pred_8_3',
 'pred_8_4',
 'pred_8',
 'pred_9_0',
 'pred_9_1',
 'pred_9_2',
 'pred_9_3',
 'pred_9_4',
 'pred_9',
 'pred_10_0',
 'pred_10_1',
 'pred_10_2',
 'pred_10_3',
 'pred_10_4',
 'pred_10',
 'pred_11_0',
 'pred_11_1',
 'pred_11_2',
 'pred_11_3',
 'pred_11_4',
 'pred_11',
 'pred_12_0',
 'pred_12_1',
 'pred_12_2',
 'pred_12_3',
 'pred_12_4',
 'pred_12',
 'pred_13_0',
 'pred_13_1',
 'pred_13_2',
 'pred_13_3',
 'p

In [87]:
traincols = ['GR',
 'processed_GR',
 'processed_GR2',
 'GR_rate',
 'pred_2_0',
 'pred_2_1',
 'pred_2_2',
 'pred_2_3',
 'pred_2_4',
 'pred_2',
 'pred_3_0',
 'pred_3_1',
 'pred_3_2',
 'pred_3_3',
 'pred_3_4',
 'pred_3',
 'pred_4_0',
 'pred_4_1',
 'pred_4_2',
 'pred_4_3',
 'pred_4_4',
 'pred_4',
 'pred_5_0',
 'pred_5_1',
 'pred_5_2',
 'pred_5_3',
 'pred_5_4',
 'pred_5',
 'pred_6_0',
 'pred_6_1',
 'pred_6_2',
 'pred_6_3',
 'pred_6_4',
 'pred_6',
 'pred_7_0',
 'pred_7_1',
 'pred_7_2',
 'pred_7_3',
 'pred_7_4',
 'pred_7',
 'pred_8_0',
 'pred_8_1',
 'pred_8_2',
 'pred_8_3',
 'pred_8_4',
 'pred_8',
 'pred_9_0',
 'pred_9_1',
 'pred_9_2',
 'pred_9_3',
 'pred_9_4',
 'pred_9',
 'pred_10_0',
 'pred_10_1',
 'pred_10_2',
 'pred_10_3',
 'pred_10_4',
 'pred_10',
 'pred_11_0',
 'pred_11_1',
 'pred_11_2',
 'pred_11_3',
 'pred_11_4',
 'pred_11',
 'pred_12_0',
 'pred_12_1',
 'pred_12_2',
 'pred_12_3',
 'pred_12_4',
 'pred_12',
 'pred_13_0',
 'pred_13_1',
 'pred_13_2',
 'pred_13_3',
 'pred_13_4',
 'pred_13',
 'pred_14_0',
 'pred_14_1',
 'pred_14_2',
 'pred_14_3',
 'pred_14_4',
 'pred_14',
 'pred_2_shift',
 'pred_3_shift',
 'pred_4_shift',
 'pred_5_shift',
 'pred_6_shift',
 'pred_7_shift',
 'pred_8_shift',
 'pred_9_shift',
 'pred_10_shift',
 'pred_11_shift',
 'pred_12_shift',
 'pred_13_shift',
 'pred_14_shift',
 'rolling_processed_GR_mean_11',
 'rolling_processed_GR_std_11',
 'rolling_processed_GR_mean_22',
 'rolling_processed_GR_std_22',
 'rolling_processed_GR_mean_55',
 'rolling_processed_GR_std_55',
 'rolling_processed_GR_mean_110',
 'rolling_processed_GR_std_110',
 'rolling_processed_GR2_mean_11',
 'rolling_processed_GR2_std_11',
 'rolling_processed_GR2_mean_22',
 'rolling_processed_GR2_std_22',
 'rolling_processed_GR2_mean_55',
 'rolling_processed_GR2_std_55',
 'rolling_processed_GR2_mean_110',
 'rolling_processed_GR2_std_110',
 'rolling_GR_rate_mean_11',
 'rolling_GR_rate_std_11',
 'rolling_GR_rate_mean_22',
 'rolling_GR_rate_std_22',
 'rolling_GR_rate_mean_55',
 'rolling_GR_rate_std_55',
 'rolling_GR_rate_mean_110',
 'rolling_GR_rate_std_110']

In [88]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score

In [89]:
for i in range(2,15):
    print ("Val accuracy of model {} is {}".format(i,accuracy_score(val_pred_df.label,val_pred_df['pred_{}'.format(i)])))
    #print ("Val accuracy of shift model {} is {}".format(i,accuracy_score(val_pred_df.label,val_pred_df['pred_{}_shift'.format(i)])))

Val accuracy of model 2 is 0.9521670454545454
Val accuracy of model 3 is 0.9530272727272727
Val accuracy of model 4 is 0.9630431818181818
Val accuracy of model 5 is 0.9598386363636363
Val accuracy of model 6 is 0.9574045454545455
Val accuracy of model 7 is 0.6769477272727272
Val accuracy of model 8 is 0.9431840909090909
Val accuracy of model 9 is 0.9466284090909091
Val accuracy of model 10 is 0.9507465909090909
Val accuracy of model 11 is 0.9508806818181819
Val accuracy of model 12 is 0.9616397727272727
Val accuracy of model 13 is 0.9670409090909091
Val accuracy of model 14 is 0.9666784090909091


In [90]:
cross_val_score(X=val_pred_df[traincols],y=val_pred_df.label,cv=5,estimator=LogisticRegression(),groups=val_pred_df.well_id)



array([0.96835813, 0.9682445 , 0.96514773, 0.96665322, 0.97127256])

In [91]:
np.mean([0.96835813, 0.9682445 , 0.96514773, 0.96665322, 0.97127256])

0.967935228

In [146]:
from sklearn import tree

In [147]:
dt = tree.DecisionTreeClassifier()
dt.fit(val_pred_df[traincols],val_pred_df.label)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [156]:
val_pred_df.shape

(880000, 73)

In [216]:
dt2 = tree.DecisionTreeClassifier(max_depth=10)
dt2.fit(val_pred_df.iloc[:660000][traincols],val_pred_df.iloc[:660000].label)
accuracy_score(val_pred_df.iloc[660000:].label,dt2.predict(val_pred_df.iloc[660000:][traincols]))

0.9565454545454546

In [176]:
accuracy_score(val_pred_df.iloc[:660000].label,dt2.predict(val_pred_df.iloc[:660000][traincols]))

0.9682409090909091

In [159]:
from sklearn.naive_bayes import GaussianNB, MultinomialNB

In [177]:
?LogisticRegression

In [92]:
#nb = GaussianNB()
nb = LogisticRegression()
nb.fit(val_pred_df.iloc[:660000][traincols],val_pred_df.iloc[:660000].label)
accuracy_score(val_pred_df.iloc[660000:].label,nb.predict(val_pred_df.iloc[660000:][traincols]))



0.9702409090909091

In [93]:
nb.coef_[0]

array([-2.37747323e-03,  1.62222493e-01,  1.90364422e-01, -7.44589882e-02,
        7.90593680e-01, -4.73565779e-01, -4.23072660e-01, -9.00958530e-01,
       -1.32672442e+00,  1.62440802e-01,  4.18582122e-01, -5.79050185e-01,
       -7.12914131e-01, -7.66623558e-01, -6.93722123e-01,  2.03601835e-01,
        1.85696642e+00, -8.64482832e-01, -1.03434266e+00, -8.29814462e-01,
       -1.46205445e+00,  2.98222513e-01,  2.28501445e-01, -8.46764925e-01,
       -1.14824239e+00, -5.46546432e-01, -2.06756246e-02,  1.73995862e-01,
       -7.26335541e-02, -3.71536471e-01, -5.51654141e-01, -5.41618166e-01,
       -7.96285844e-01,  1.33827666e-01,  1.94358091e-01, -1.23712178e-02,
       -2.51240538e+00,  7.80801783e-01, -7.84111085e-01, -8.45709019e-02,
       -5.64282719e-02,  3.18487009e-01, -8.51535311e-01, -8.03927850e-01,
       -9.40323059e-01,  1.36159812e-01,  1.29453987e-01, -1.35365751e-01,
       -3.65688128e-01, -1.52602460e+00, -4.36102753e-01,  9.80379565e-02,
       -1.17463508e+00, -

In [96]:
#nb.fit(val_pred_df[traincols],val_pred_df.label)
submission_ensemble = submission_data[['unique_id','label']].copy()
submission_ensemble['label'] = nb.predict(submission_data[traincols])
submission_ensemble.head(10)

Unnamed: 0,unique_id,label
0,CAX_0,0
1,CAX_1,0
2,CAX_2,0
3,CAX_3,0
4,CAX_4,0
5,CAX_5,0
6,CAX_6,0
7,CAX_7,0
8,CAX_8,0
9,CAX_9,0


In [208]:
val_pred_df['max_label'] = val_pred_df[['pred_1','pred_2','pred_3','pred_4','pred_5','pred_6', 'pred_7', 'pred_8','pred_9','pred_10','pred_11']].mode(axis=1)




KeyboardInterrupt: 

In [203]:
val_pred_df[['pred_1','pred_2','pred_3','pred_4','pred_5','pred_6', 'pred_7', 'pred_8','pred_9','pred_10','pred_11']].max(axis=1).head()

0    0
1    0
2    0
3    0
4    0
dtype: int64

In [207]:
accuracy_score(val_pred_df.label,val_pred_df.max_label)

0.9589613636363636

In [97]:
submission_ensemble.to_csv('../data/submission14_ensemble.csv',index=False)

In [153]:
def print_decision_tree(tree, feature_names=None, offset_unit='    '):
    '''Plots textual representation of rules of a decision tree
    tree: scikit-learn representation of tree
    feature_names: list of feature names. They are set to f1,f2,f3,... if not specified
    offset_unit: a string of offset of the conditional block'''

    left      = tree.tree_.children_left
    right     = tree.tree_.children_right
    threshold = tree.tree_.threshold
    value = tree.tree_.value
    if feature_names is None:
        features  = ['f%d'%i for i in tree.tree_.feature]
    else:
        features  = [feature_names[i] for i in tree.tree_.feature]        

    def recurse(left, right, threshold, features, node, depth=0):
            offset = offset_unit*depth
            if (threshold[node] != -2):
                    print(offset+"if ( " + features[node] + " <= " + str(threshold[node]) + " ) {")
                    if left[node] != -1:
                            recurse (left, right, threshold, features,left[node],depth+1)
                    print(offset+"} else {")
                    if right[node] != -1:
                            recurse (left, right, threshold, features,right[node],depth+1)
                    print(offset+"}")
            else:
                    print(offset+"return " + str(value[node]))

    recurse(left, right, threshold, features, 0,0)

In [155]:
accuracy_score(val_pred_df.label,dt.predict(val_pred_df[traincols]))

1.0

In [154]:
print_decision_tree(dt,traincols)

if ( pred_4_0 <= 0.5233772099018097 ) {
    if ( pred_4_4 <= 0.4478372633457184 ) {
        if ( pred_4_2 <= 0.4538211226463318 ) {
            if ( pred_4_3 <= 0.3039254695177078 ) {
                if ( pred_4_1 <= 0.8839332461357117 ) {
                    if ( pred_5_0 <= 0.20386961102485657 ) {
                        if ( pred_6_2 <= 0.14233114570379257 ) {
                            if ( pred_5_3 <= 0.26921169459819794 ) {
                                if ( pred_6_4 <= 0.049533963203430176 ) {
                                    if ( pred_4_0 <= 1.5170796245911333e-06 ) {
                                        if ( pred_5_0 <= 2.761379505500372e-06 ) {
                                            return [[ 0.  0. 39.  0.  0.]]
                                        } else {
                                            if ( pred_3_0 <= 7.132932296372019e-05 ) {
                                                return [[ 0. 15.  0.  0.  0.]]
                                      

                                                                                    }
                                                                                } else {
                                                                                    return [[0. 0. 0. 2. 0.]]
                                                                                }
                                                                            } else {
                                                                                if ( pred_4_3 <= 0.08135317638516426 ) {
                                                                                    return [[0. 0. 1. 0. 0.]]
                                                                                } else {
                                                                                    return [[ 0. 29.  0.  0.  0.]]
                                                                                }
                               

                                                                        }
                                                                    }
                                                                } else {
                                                                    return [[0. 3. 0. 0. 0.]]
                                                                }
                                                            }
                                                        } else {
                                                            if ( pred_8_1 <= 0.28897346556186676 ) {
                                                                if ( pred_3_3 <= 0.8787994086742401 ) {
                                                                    if ( pred_1_1 <= 0.02785965148359537 ) {
                                                                        return [[0. 0. 0. 1. 0.]]
                                                                    } else {
       

                                                }
                                            } else {
                                                if ( pred_6_0 <= 0.0061444437596946955 ) {
                                                    if ( pred_2_0 <= 0.12136014923453331 ) {
                                                        if ( pred_3_3 <= 0.10287079587578773 ) {
                                                            if ( pred_2_0 <= 0.016459620790556073 ) {
                                                                return [[0. 5. 0. 0. 0.]]
                                                            } else {
                                                                return [[0. 0. 2. 0. 0.]]
                                                            }
                                                        } else {
                                                            if ( pred_1_3 <= 0.6666736602783203 ) {
                                      

                                                                    return [[0. 4. 0. 0. 0.]]
                                                                } else {
                                                                    return [[0. 0. 0. 1. 0.]]
                                                                }
                                                            } else {
                                                                return [[0. 0. 0. 8. 0.]]
                                                            }
                                                        }
                                                    } else {
                                                        if ( pred_5_1 <= 0.2408333495259285 ) {
                                                            if ( pred_4_4 <= 0.0004097555502085015 ) {
                                                                if ( pred_11_2 <= 0.0013892544811824337 ) {
                          

                            if ( pred_10_3 <= 0.34989093244075775 ) {
                                if ( pred_8_2 <= 0.23258210718631744 ) {
                                    if ( pred_6_1 <= 0.9400925040245056 ) {
                                        if ( pred_11_2 <= 0.08315405994653702 ) {
                                            if ( pred_7_4 <= 0.03937716968357563 ) {
                                                if ( pred_6_1 <= 0.03616506326943636 ) {
                                                    return [[0. 0. 0. 0. 1.]]
                                                } else {
                                                    return [[3. 0. 0. 0. 0.]]
                                                }
                                            } else {
                                                if ( pred_11_4 <= 0.000366305626812391 ) {
                                                    return [[0. 0. 3. 0. 0.]]
                                        

                                                                                                            if ( pred_10_0 <= 0.8401418030261993 ) {
                                                                                                                return [[2. 0. 0. 0. 0.]]
                                                                                                            } else {
                                                                                                                return [[0. 7. 0. 0. 0.]]
                                                                                                            }
                                                                                                        }
                                                                                                    }
                                                                                                } else {
                                   

                                                }
                                            } else {
                                                if ( pred_10_1 <= 0.9977299273014069 ) {
                                                    return [[ 0. 13.  0.  0.  0.]]
                                                } else {
                                                    if ( pred_7_3 <= 0.06482625938951969 ) {
                                                        return [[0. 1. 0. 0. 0.]]
                                                    } else {
                                                        return [[0. 0. 4. 0. 0.]]
                                                    }
                                                }
                                            }
                                        } else {
                                            if ( pred_6_4 <= 0.28326040506362915 ) {
                                                if ( pred_7_3 <= 0

                                                                                                                            } else {
                                                                                                                                if ( pred_6_4 <= 0.09284399636089802 ) {
                                                                                                                                    return [[0. 5. 0. 0. 0.]]
                                                                                                                                } else {
                                                                                                                                    return [[0. 0. 1. 0. 0.]]
                                                                                                                                }
                                                                                                                   

                                                                                                    return [[ 0. 38.  0.  0.  0.]]
                                                                                                }
                                                                                            } else {
                                                                                                return [[0. 0. 0. 1. 0.]]
                                                                                            }
                                                                                        }
                                                                                    } else {
                                                                                        if ( pred_10_0 <= 0.0006419044948415831 ) {
                                                                                            return [[0. 7. 0. 0. 0.]]
                     

                                                                        if ( pred_1_0 <= 9.039858923642896e-06 ) {
                                                                            if ( pred_10_0 <= 5.124288691149559e-05 ) {
                                                                                return [[0. 0. 0. 0. 7.]]
                                                                            } else {
                                                                                if ( pred_7_0 <= 4.866492145083612e-05 ) {
                                                                                    return [[0. 0. 0. 0. 1.]]
                                                                                } else {
                                                                                    return [[0. 0. 0. 5. 0.]]
                                                                                }
                                                            

                                                                                return [[0. 1. 0. 0. 0.]]
                                                                            } else {
                                                                                return [[0. 0. 0. 0. 1.]]
                                                                            }
                                                                        }
                                                                    }
                                                                } else {
                                                                    if ( pred_2_2 <= 0.04538960941135883 ) {
                                                                        return [[0. 0. 0. 3. 0.]]
                                                                    } else {
                                                                        if ( pred_2_0 <= 0.09536449238657951 ) {
           

                                                                                    } else {
                                                                                        return [[ 0.  0.  0. 29.  0.]]
                                                                                    }
                                                                                } else {
                                                                                    if ( pred_11_2 <= 0.014254369307309389 ) {
                                                                                        return [[0. 0. 0. 6. 0.]]
                                                                                    } else {
                                                                                        if ( pred_3_4 <= 0.002909460279624909 ) {
                                                                                            return [[0. 0. 0. 2. 0.]]
                               

                                                            }
                                                        }
                                                    }
                                                }
                                            } else {
                                                if ( pred_3_4 <= 0.00552051211707294 ) {
                                                    if ( pred_2_1 <= 0.004722510930150747 ) {
                                                        if ( pred_5_1 <= 0.0005636375135509297 ) {
                                                            return [[0. 0. 0. 1. 0.]]
                                                        } else {
                                                            if ( pred_5_2 <= 1.996399078052491e-05 ) {
                                                                return [[0. 0. 0. 1. 0.]]
                                                            } else {
                            

                                                        if ( pred_8_0 <= 0.07775792479515076 ) {
                                                            if ( pred_11_4 <= 0.0004204483557259664 ) {
                                                                return [[0. 0. 0. 4. 0.]]
                                                            } else {
                                                                if ( pred_3_4 <= 0.13092108070850372 ) {
                                                                    if ( pred_4_4 <= 0.001303691475186497 ) {
                                                                        if ( pred_6_4 <= 0.008919531013816595 ) {
                                                                            return [[2. 0. 0. 0. 0.]]
                                                                        } else {
                                                                            return [[0. 0. 0. 3. 0.]]
                          

                                                                                return [[0. 0. 0. 1. 0.]]
                                                                            }
                                                                        } else {
                                                                            if ( pred_2_0 <= 0.235363207757473 ) {
                                                                                return [[ 0.  0.  0. 92.  0.]]
                                                                            } else {
                                                                                if ( pred_3_0 <= 0.2521394193172455 ) {
                                                                                    return [[0. 1. 0. 0. 0.]]
                                                                                } else {
                                                                                    return [[0. 0. 0. 2. 

                                                                                                                    return [[0. 1. 0. 0. 0.]]
                                                                                                                }
                                                                                                            } else {
                                                                                                                return [[0. 2. 0. 0. 0.]]
                                                                                                            }
                                                                                                        }
                                                                                                    } else {
                                                                                                        if ( pred_3_3 <= 0.9527070820331573 ) {
                    

                                    }
                                }
                            } else {
                                if ( pred_11_3 <= 0.6252744197845459 ) {
                                    return [[0. 7. 0. 0. 0.]]
                                } else {
                                    return [[0. 0. 0. 3. 0.]]
                                }
                            }
                        }
                    }
                }
            }
        } else {
            if ( pred_4_2 <= 0.8835849165916443 ) {
                if ( pred_6_1 <= 0.21099236607551575 ) {
                    if ( pred_5_4 <= 0.0429900698363781 ) {
                        if ( pred_4_3 <= 0.20941783487796783 ) {
                            if ( pred_6_4 <= 0.01460859039798379 ) {
                                if ( pred_11_2 <= 0.06717346981167793 ) {
                                    if ( pred_2_2 <= 0.07650898396968842 ) {
                                      

                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                } else {
                                    if ( pred_9_3 <= 0.0009031230583786964 ) {
                                        if ( pred_10_1 <= 0.0001884053708636202 ) {
                                            if ( pred_4_0 <= 7.77081800151791e-06 ) {
                                                return [[ 0.  0.  0.  0. 17.]]
                                            } else {
                                                if ( pred_1_0 <= 1.3125964528626355e-06 ) {
                                                    return [[0. 0. 0. 0. 3.]]
                                                } else {
                                                    return [[0.

                                        } else {
                                            return [[0. 0. 1. 0. 0.]]
                                        }
                                    } else {
                                        return [[0. 0. 0. 0. 1.]]
                                    }
                                }
                            } else {
                                if ( pred_8_4 <= 0.00010391738032922149 ) {
                                    return [[0. 9. 0. 0. 0.]]
                                } else {
                                    if ( pred_10_3 <= 0.0876077450811863 ) {
                                        if ( pred_11_3 <= 0.0008169434440787882 ) {
                                            if ( pred_6_1 <= 0.36196622252464294 ) {
                                                if ( pred_7_3 <= 0.107833381742239 ) {
                                                    return [[0. 1. 0. 0. 0.]]
                             

                                            }
                                        } else {
                                            if ( pred_1_3 <= 0.00016150498413480818 ) {
                                                if ( pred_6_2 <= 0.9103286862373352 ) {
                                                    if ( pred_5 <= 3.0 ) {
                                                        return [[ 0.  0.  0.  0. 12.]]
                                                    } else {
                                                        return [[0. 0. 2. 0. 0.]]
                                                    }
                                                } else {
                                                    return [[ 0.  0. 10.  0.  0.]]
                                                }
                                            } else {
                                                if ( pred_8_3 <= 0.1809551641345024 ) {
                                       

                                                    if ( pred_6_1 <= 0.011603695806115866 ) {
                                                        if ( pred_10_4 <= 0.005636036396026611 ) {
                                                            if ( pred_4_3 <= 5.82915818085894e-05 ) {
                                                                if ( pred_8_4 <= 0.00553887477144599 ) {
                                                                    if ( pred_10_4 <= 0.003750772215425968 ) {
                                                                        if ( pred_10_4 <= 0.0014090684708207846 ) {
                                                                            return [[    0.     0. 40164.     0.     0.]]
                                                                        } else {
                                                                            if ( pred_9_1 <= 7.18926967238076e-05 ) {
                                                    

                                                                        return [[1. 0. 0. 0. 0.]]
                                                                    } else {
                                                                        if ( pred_1_4 <= 0.793089896440506 ) {
                                                                            if ( pred_4_4 <= 0.4621013104915619 ) {
                                                                                return [[0. 1. 0. 0. 0.]]
                                                                            } else {
                                                                                if ( pred_3_3 <= 0.010784315411001444 ) {
                                                                                    if ( pred_8_2 <= 0.00020847571431659162 ) {
                                                                                        if ( pred_10_4 <= 0.6273409724235535 ) {
                            

                                                } else {
                                                    if ( pred_2_3 <= 0.0012043709866702557 ) {
                                                        if ( pred_10_2 <= 0.001463003980461508 ) {
                                                            if ( pred_11_4 <= 0.18274858593940735 ) {
                                                                return [[0. 3. 0. 0. 0.]]
                                                            } else {
                                                                return [[ 0.  0.  0.  0. 11.]]
                                                            }
                                                        } else {
                                                            if ( pred_9_1 <= 0.8279911875724792 ) {
                                                                if ( pred_2_4 <= 0.873524010181427 ) {
                                                               

                                                            } else {
                                                                if ( pred_9_4 <= 0.2675819620490074 ) {
                                                                    return [[0. 0. 0. 0. 3.]]
                                                                } else {
                                                                    return [[3. 0. 0. 0. 0.]]
                                                                }
                                                            }
                                                        }
                                                    } else {
                                                        if ( pred_11_2 <= 6.96915085427463e-05 ) {
                                                            if ( pred_3_1 <= 0.00023115005751606077 ) {
                                                                return [[0. 0. 0. 0. 3.]]
                          

                                                                                    return [[ 0.  0.  0.  0. 11.]]
                                                                                }
                                                                            }
                                                                        }
                                                                    } else {
                                                                        if ( pred_5_0 <= 0.3347989171743393 ) {
                                                                            if ( pred_7_1 <= 0.04246523976325989 ) {
                                                                                return [[0. 0. 0. 0. 2.]]
                                                                            } else {
                                                                                if ( pred_1_1 <= 0.1299639195203781 ) {
                                  

                                                                                                                        } else {
                                                                                                                            if ( pred_1_4 <= 0.4508789926767349 ) {
                                                                                                                                return [[0. 1. 0. 0. 0.]]
                                                                                                                            } else {
                                                                                                                                return [[0. 0. 0. 0. 7.]]
                                                                                                                            }
                                                                                                                        }
                  

                                                                        }
                                                                    }
                                                                } else {
                                                                    return [[1. 0. 0. 0. 0.]]
                                                                }
                                                            } else {
                                                                if ( pred_7_3 <= 0.29599514603614807 ) {
                                                                    if ( pred_11_0 <= 0.053110962733626366 ) {
                                                                        return [[0. 1. 0. 0. 0.]]
                                                                    } else {
                                                                        if ( pred_11_3 <= 0.00012800926197087392 ) {
                                              

                                            }
                                        } else {
                                            if ( pred_6_4 <= 0.9299140572547913 ) {
                                                return [[0. 5. 0. 0. 0.]]
                                            } else {
                                                return [[0. 0. 0. 0. 5.]]
                                            }
                                        }
                                    } else {
                                        return [[0. 3. 0. 0. 0.]]
                                    }
                                }
                            } else {
                                if ( pred_5_3 <= 0.005619138944894075 ) {
                                    if ( pred_2_4 <= 0.020364921540021896 ) {
                                        return [[0. 2. 0. 0. 0.]]
                                    } else {
                                        if ( pred

                                                    }
                                                } else {
                                                    if ( pred_6_2 <= 0.00016714831144781783 ) {
                                                        if ( pred_5_2 <= 4.3208099668845534e-05 ) {
                                                            if ( pred_6_2 <= 0.00014258829469326884 ) {
                                                                if ( pred_1_1 <= 0.2552986517548561 ) {
                                                                    return [[1. 0. 0. 0. 0.]]
                                                                } else {
                                                                    return [[0. 7. 0. 0. 0.]]
                                                                }
                                                            } else {
                                                                return [[3. 0. 0. 0. 0.]]

                                                                        } else {
                                                                            return [[6. 0. 0. 0. 0.]]
                                                                        }
                                                                    }
                                                                }
                                                            } else {
                                                                if ( pred_3_0 <= 0.9555685222148895 ) {
                                                                    if ( pred_10_3 <= 0.00028553618176374584 ) {
                                                                        return [[0. 0. 0. 0. 1.]]
                                                                    } else {
                                                                        return [[15.  0.  0.  0.  0.]]
                                           

                                                                                                                } else {
                                                                                                                    return [[36.  0.  0.  0.  0.]]
                                                                                                                }
                                                                                                            }
                                                                                                        }
                                                                                                    }
                                                                                                }
                                                                                            } else {
                                                                                                if ( 

                                                                    if ( pred_9_0 <= 0.8779549300670624 ) {
                                                                        if ( pred_1_1 <= 0.004711187677457929 ) {
                                                                            if ( pred_5_2 <= 0.0003391255741007626 ) {
                                                                                return [[13.  0.  0.  0.  0.]]
                                                                            } else {
                                                                                if ( pred_8_2 <= 0.005980492569506168 ) {
                                                                                    if ( pred_7_0 <= 0.8664487600326538 ) {
                                                                                        return [[0. 0. 0. 0. 9.]]
                                                                                    } else {
          

                                                            }
                                                        } else {
                                                            if ( pred_10_3 <= 0.9624865055084229 ) {
                                                                return [[5. 0. 0. 0. 0.]]
                                                            } else {
                                                                return [[0. 0. 0. 1. 0.]]
                                                            }
                                                        }
                                                    }
                                                } else {
                                                    return [[0. 0. 0. 8. 0.]]
                                                }
                                            }
                                        }
                                    } else {
                               

                                                        if ( pred_11_4 <= 0.00010383338303654455 ) {
                                                            return [[1. 0. 0. 0. 0.]]
                                                        } else {
                                                            if ( pred_4_0 <= 0.5446065068244934 ) {
                                                                return [[1. 0. 0. 0. 0.]]
                                                            } else {
                                                                if ( pred_8_4 <= 0.00018904438911704347 ) {
                                                                    return [[1. 0. 0. 0. 0.]]
                                                                } else {
                                                                    if ( pred_3_2 <= 0.0002563003799878061 ) {
                                                                        return [[ 0.  0.  0. 32.  0.]]


                                                        if ( pred_9_2 <= 0.0003504219785099849 ) {
                                                            return [[23.  0.  0.  0.  0.]]
                                                        } else {
                                                            return [[0. 0. 0. 1. 0.]]
                                                        }
                                                    } else {
                                                        if ( pred_11_0 <= 0.9404525458812714 ) {
                                                            if ( pred_9_3 <= 0.14748778194189072 ) {
                                                                if ( pred_8_3 <= 0.011275526136159897 ) {
                                                                    return [[3. 0. 0. 0. 0.]]
                                                                } else {
                                                                    r

                                                    }
                                                }
                                            } else {
                                                if ( pred_10_4 <= 9.695997141534463e-05 ) {
                                                    return [[0. 0. 0. 2. 0.]]
                                                } else {
                                                    if ( pred_8_2 <= 0.0016896916786208749 ) {
                                                        if ( pred_11_2 <= 0.0019400809542275965 ) {
                                                            if ( pred_8_1 <= 0.13243171945214272 ) {
                                                                if ( pred_5_4 <= 1.0642096185620176e-05 ) {
                                                                    return [[0. 0. 0. 1. 0.]]
                                                                } else {
                                             

                                                                                                                                            } else {
                                                                                                                                                return [[0. 0. 0. 0. 1.]]
                                                                                                                                            }
                                                                                                                                        } else {
                                                                                                                                            if ( pred_4_3 <= 0.015458435285836458 ) {
                                                                                                                                                if ( pred_11_1 <= 0.003062513889744878 ) {
                         

                                                                                                                        } else {
                                                                                                                            if ( pred_3_2 <= 0.00011620392251643352 ) {
                                                                                                                                return [[0. 1. 0. 0. 0.]]
                                                                                                                            } else {
                                                                                                                                return [[1. 0. 0. 0. 0.]]
                                                                                                                            }
                                                                                                                        }
              

                                                                                                                        return [[0. 0. 0. 1. 0.]]
                                                                                                                    } else {
                                                                                                                        return [[4. 0. 0. 0. 0.]]
                                                                                                                    }
                                                                                                                } else {
                                                                                                                    if ( pred_6_1 <= 0.001263428304810077 ) {
                                                                                                                        if ( pred_9_4 <= 0.005835391115397215 ) {
                        

                                                                                    }
                                                                                } else {
                                                                                    if ( pred_6_4 <= 0.013272557873278856 ) {
                                                                                        if ( pred_6_4 <= 0.013256094418466091 ) {
                                                                                            if ( pred_3_2 <= 8.310620614793152e-05 ) {
                                                                                                return [[65.  0.  0.  0.  0.]]
                                                                                            } else {
                                                                                                if ( pred_7_2 <= 0.0020707903895527124 ) {
                                                                   

                                                                                                    } else {
                                                                                                        if ( pred_8_2 <= 0.0002646654102136381 ) {
                                                                                                            return [[0. 0. 0. 0. 1.]]
                                                                                                        } else {
                                                                                                            return [[4. 0. 0. 0. 0.]]
                                                                                                        }
                                                                                                    }
                                                                                                }
                                                         

                                                                                } else {
                                                                                    return [[0. 0. 0. 1. 0.]]
                                                                                }
                                                                            }
                                                                        } else {
                                                                            if ( pred_4_2 <= 9.571057944413042e-06 ) {
                                                                                if ( pred_9_1 <= 0.00025217993243131787 ) {
                                                                                    if ( pred_9_1 <= 0.0002519727568142116 ) {
                                                                                        if ( pred_7_4 <= 0.00835098186507821 ) {
                                                             

In [151]:
import graphviz 
dot_data = tree.export_graphviz(dt, out_file=None, 
                      feature_names=traincols,  
                      filled=True, rounded=True,  
                      special_characters=True)  
graph = graphviz.Source(dot_data)  
graph 

KeyboardInterrupt: 

In [112]:
cross_val_score(X=val_pred_df[traincols],y=val_pred_df.label,cv=5,estimator=RandomForestClassifier(n_estimators=100,random_state=123),groups=val_pred_df.well_id)



array([0.96556838, 0.96559679, 0.96108523, 0.96296002, 0.96761913])

In [113]:
np.mean([0.96556838, 0.96559679, 0.96108523, 0.96296002, 0.96761913])

0.9645659099999999

In [122]:
cross_val_score(X=val_pred_df[traincols],y=val_pred_df.label,cv=5,estimator=GradientBoostingClassifier(learning_rate=.1,n_estimators=100,random_state=123),groups=val_pred_df.well_id)


array([0.96582406, 0.96534111, 0.96124432, 0.96254524, 0.96812482])

In [124]:
np.mean([0.96582406, 0.96534111, 0.96124432, 0.96254524, 0.96812482])

0.96461591

In [99]:
import xgboost as xgb
import lightgbm as lgb

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [100]:
clf = xgb.XGBClassifier(max_depth=31,n_estimators=200,objective='multi:softprob')

In [130]:
cross_val_score(X=val_pred_df[traincols],y=val_pred_df.label,cv=5,estimator=clf,groups=val_pred_df.well_id)


array([0.96547179, 0.96502293, 0.96113636, 0.96219865, 0.96681799])

In [131]:
np.mean([0.96547179, 0.96502293, 0.96113636, 0.96219865, 0.96681799])

0.9641295440000001

In [101]:
#nb = GaussianNB()
#nb = LogisticRegression()
clf.fit(val_pred_df.iloc[:660000][traincols],val_pred_df.iloc[:660000].label)
accuracy_score(val_pred_df.iloc[660000:].label,clf.predict(val_pred_df.iloc[660000:][traincols]))

KeyboardInterrupt: 

In [147]:
traincols2 = [
 'processed_GR2',
 'GR_rate',
 'pred_2_0',
 'pred_2_1',
 'pred_2_2',
 'pred_2_3',
 'pred_2_4',
 'pred_3_0',
 'pred_3_1',
 'pred_3_2',
 'pred_3_3',
 'pred_3_4',
 'pred_4_0',
 'pred_4_1',
 'pred_4_2',
 'pred_4_3',
 'pred_4_4',
 'pred_5_0',
 'pred_5_1',
 'pred_5_2',
 'pred_5_3',
 'pred_5_4',
 'pred_6_0',
 'pred_6_1',
 'pred_6_2',
 'pred_6_3',
 'pred_6_4',
 'pred_7_0',
 'pred_7_1',
 'pred_7_2',
 'pred_7_3',
 'pred_7_4',
 'pred_8_0',
 'pred_8_1',
 'pred_8_2',
 'pred_8_3',
 'pred_8_4',
 'pred_9_0',
 'pred_9_1',
 'pred_9_2',
 'pred_9_3',
 'pred_9_4',
 'pred_10_0',
 'pred_10_1',
 'pred_10_2',
 'pred_10_3',
 'pred_10_4',
 'pred_11_0',
 'pred_11_1',
 'pred_11_2',
 'pred_11_3',
 'pred_11_4',
 'pred_12_0',
 'pred_12_1',
 'pred_12_2',
 'pred_12_3',
 'pred_12_4',
 'pred_13_0',
 'pred_13_1',
 'pred_13_2',
 'pred_13_3',
 'pred_13_4',
 'pred_14_0',
 'pred_14_1',
 'pred_14_2',
 'pred_14_3',
 'pred_14_4']

In [154]:
def get_simple_ensemble_model():
    model = Sequential()
    model.add(InputLayer(input_shape=(max_len,len(traincols2))))
    model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2, return_sequences = True)))
    model.add(Dense(200,activation='relu'))
    model.add(Dense(200,activation='relu'))
    model.add(Dense(200,activation='relu'))
    #model.add(SeqSelfAttention(attention_width=50,attention_activation='sigmoid',name='Attention'))
    model.add(Dense(n_output,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer="nadam",metrics = ['accuracy'])
    #print(model.summary())
    return model

In [152]:
model = get_simple_ensemble_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_77 (Bidirectio (None, 1100, 200)         134400    
_________________________________________________________________
dense_95 (Dense)             (None, 1100, 200)         40200     
_________________________________________________________________
dense_96 (Dense)             (None, 1100, 200)         40200     
_________________________________________________________________
dense_97 (Dense)             (None, 1100, 200)         40200     
_________________________________________________________________
dense_98 (Dense)             (None, 1100, 5)           1005      
Total params: 256,005
Trainable params: 256,005
Non-trainable params: 0
_________________________________________________________________


In [123]:
val_pred_df.label.value_counts(normalize=True)

0    0.524042
4    0.122483
3    0.119023
2    0.118386
1    0.116066
Name: label, dtype: float64

In [157]:
from keras.optimizers import Adam, Nadam

In [148]:
val_pred_df_x = val_pred_df[traincols2].values.reshape(val_pred_df.well_id.nunique(),max_len,len(traincols2))
val_pred_df_y = pd.get_dummies(val_pred_df.label).values.reshape(val_pred_df.well_id.nunique(),max_len,5)

print (val_pred_df_x.shape, val_pred_df_y.shape)

(800, 1100, 67) (800, 1100, 5)


In [120]:
def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

In [119]:
iter = 0
n_epochs = 50
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(val_pred_df_x,val_pred_df_y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = val_pred_df_x[train_index]
    train_y = val_pred_df_y[train_index]
    val_x = val_pred_df_x[test_index]
    val_y = val_pred_df_y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_simple_ensemble_model()
    
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_lstm_ensemble_fl_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 640 samples, validate on 160 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.92776, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 2/50

Epoch 00002: val_acc improved from 0.92776 to 0.95687, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 3/50

Epoch 00003: val_acc improved from 0.95687 to 0.96145, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 4/50

Epoch 00004: val_acc improved from 0.96145 to 0.96313, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 5/50

Epoch 00005: val_acc improved from 0.96313 to 0.96398, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 6/50

Epoch 00006: val_acc improved from 0.96398 to 0.96464, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 7/50

Epoch 00007: val_acc improved from 0.96464 to 0.96520, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 8/50

Epoch 00008: val_acc improved from 0.96520 to 0.96559, saving model to .


Epoch 00037: val_acc improved from 0.96881 to 0.96889, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 38/50

Epoch 00038: val_acc did not improve from 0.96889
Epoch 39/50

Epoch 00039: val_acc did not improve from 0.96889
Epoch 40/50

Epoch 00040: val_acc did not improve from 0.96889
Epoch 41/50

Epoch 00041: val_acc improved from 0.96889 to 0.96893, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 42/50

Epoch 00042: val_acc did not improve from 0.96893
Epoch 43/50

Epoch 00043: val_acc did not improve from 0.96893
Epoch 44/50

Epoch 00044: val_acc did not improve from 0.96893
Epoch 45/50

Epoch 00045: val_acc improved from 0.96893 to 0.96898, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 46/50

Epoch 00046: val_acc did not improve from 0.96898
Epoch 47/50

Epoch 00047: val_acc did not improve from 0.96898
Epoch 48/50

Epoch 00048: val_acc improved from 0.96898 to 0.96907, saving model to ../working/weights_lstm_ensemble_1.hdf5
Epoch 49/50


In [139]:
iter = 0
n_epochs = 100
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(val_pred_df_x,val_pred_df_y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = val_pred_df_x[train_index]
    train_y = val_pred_df_y[train_index]
    val_x = val_pred_df_x[test_index]
    val_y = val_pred_df_y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_simple_ensemble_model()
    model.compile(loss='categorical_crossentropy',
              optimizer='nadam',
              metrics=['accuracy'])
    class_weight = {0:1,1:5,2:5,3:4,4:5}
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_lstm_ensemble_fl_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 640 samples, validate on 160 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.96650, saving model to ../working/weights_lstm_ensemble_fl_1.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.96650 to 0.96774, saving model to ../working/weights_lstm_ensemble_fl_1.hdf5
Epoch 3/100

Epoch 00003: val_acc did not improve from 0.96774
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.96774
Epoch 5/100

Epoch 00005: val_acc improved from 0.96774 to 0.96849, saving model to ../working/weights_lstm_ensemble_fl_1.hdf5
Epoch 6/100

Epoch 00006: val_acc improved from 0.96849 to 0.96879, saving model to ../working/weights_lstm_ensemble_fl_1.hdf5
Epoch 7/100

Epoch 00007: val_acc improved from 0.96879 to 0.96891, saving model to ../working/weights_lstm_ensemble_fl_1.hdf5
Epoch 8/100

Epoch 00008: val_acc did not improve from 0.96891
Epoch 9/100

Epoch 00009: val_acc improved from 0.96891 to 0.96905, saving model to ../working/weights_lstm_ensemble_fl_1

In [159]:
def weighted_categorical_crossentropy(weights):
    """
    A weighted version of keras.objectives.categorical_crossentropy
    
    Variables:
        weights: numpy array of shape (C,) where C is the number of classes
    
    Usage:
        weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
        loss = weighted_categorical_crossentropy(weights)
        model.compile(loss=loss,optimizer='adam')
    """
    
    weights = K.variable(weights)
        
    def loss(y_true, y_pred):
        # scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # calc
        loss = y_true * K.log(y_pred) * weights
        loss = -K.sum(loss, -1)
        return loss
    
    return loss

In [160]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(train_data.label),
                                                 train_data.label)
class_weights

array([0.38041206, 1.68988985, 1.67499086, 1.68537174, 1.69733769])

In [163]:
iter = 0
n_epochs = 100
all_history = {}

for train_index, test_index in KFold(n_splits=5).split(val_pred_df_x,val_pred_df_y):
    iter += 1
    
    print ("Iteration {}".format(iter))
    
    train_x = val_pred_df_x[train_index]
    train_y = val_pred_df_y[train_index]
    val_x = val_pred_df_x[test_index]
    val_y = val_pred_df_y[test_index]
    
    #print (train_x.shape, train_y.shape, val_x.shape, val_y.shape)
    model = get_simple_ensemble_model()
    #model.compile(loss=weighted_categorical_crossentropy(class_weights),
    #          optimizer="nadam",
    #          metrics=['accuracy'])
    #model.load_weights("weights1.hdf5")
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='max', baseline=None, restore_best_weights=False)
    lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001)
    checkpointer = ModelCheckpoint(monitor='val_acc',filepath='../working/weights_lstm_ensemble_fl2_{}.hdf5'.format(iter), mode='max',verbose=1, save_best_only=True)
    
    history = model.fit(train_x, train_y, epochs = n_epochs, batch_size=batch_size, verbose = 1, validation_data=(val_x,val_y), callbacks=[early,lr,checkpointer])
    all_history[iter] = history
    
    break

Iteration 1
Train on 640 samples, validate on 160 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.96884, saving model to ../working/weights_lstm_ensemble_fl2_1.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.96884 to 0.96890, saving model to ../working/weights_lstm_ensemble_fl2_1.hdf5
Epoch 3/100

Epoch 00003: val_acc improved from 0.96890 to 0.96893, saving model to ../working/weights_lstm_ensemble_fl2_1.hdf5
Epoch 4/100

Epoch 00004: val_acc improved from 0.96893 to 0.96893, saving model to ../working/weights_lstm_ensemble_fl2_1.hdf5
Epoch 5/100

Epoch 00005: val_acc improved from 0.96893 to 0.96894, saving model to ../working/weights_lstm_ensemble_fl2_1.hdf5
Epoch 6/100

Epoch 00006: val_acc did not improve from 0.96894
Epoch 7/100

Epoch 00007: val_acc did not improve from 0.96894
Epoch 8/100

Epoch 00008: val_acc did not improve from 0.96894
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.96894
Epoch 10/100

Epoch 00010: val_acc did not improve fr

KeyboardInterrupt: 

In [142]:
submission_ensemble = submission_data[['unique_id','label']].copy()
test_pred = model.predict(submission_data[traincols2].values.reshape(submission_data.well_id.nunique(),1100,len(traincols2))).argmax(axis=-1)

In [143]:
test_pred.shape

(2000, 1100)

In [144]:
submission_ensemble['label'] = test_pred.reshape(submission_data.well_id.nunique()*1100)
submission_ensemble.head(10)

Unnamed: 0,unique_id,label
0,CAX_0,0
1,CAX_1,0
2,CAX_2,0
3,CAX_3,0
4,CAX_4,0
5,CAX_5,0
6,CAX_6,0
7,CAX_7,0
8,CAX_8,0
9,CAX_9,0


In [146]:
submission_ensemble.to_csv("../data/submission_model15_dense_ensemble.csv",index=False)

In [164]:
submission_ensemble.label.value_counts(normalize=True)

0    0.528775
1    0.120071
4    0.119475
2    0.117946
3    0.113733
Name: label, dtype: float64