In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm
import torch
import gc
import torch.nn as nn
from sklearn.model_selection import StratifiedKFold
import seaborn as sns

from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

  import pandas.util.testing as tm


# Import Data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
train = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/train.csv")
test = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/test.csv")
ss = pd.read_csv('/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/submission.csv')

In [6]:
train_df = train.drop(['id','f2','f35','f44','target'], axis = 1 )
test_df = test.drop(['id','f2','f35','f44'], axis = 1)

# EDA-1 세제곱근 씌워서 파생 변수 창출

In [7]:
train.isnull().sum()[train.isnull().sum() != 0]

Series([], dtype: int64)

In [8]:
features = test_df.columns.tolist()
len(features)

97

In [9]:
for col in tqdm(features):
    train_df[col+'_bin'] = train_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)
    test_df[col+'_bin'] = test_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)

print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")
train_df.head()

100%|██████████| 97/97 [02:30<00:00,  1.55s/it]

train_df: (600000, 194) 
test_df: (540000, 194)





Unnamed: 0,f0,f1,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f36,f37,f38,f39,f40,f41,...,f60_bin,f61_bin,f62_bin,f63_bin,f64_bin,f65_bin,f66_bin,f67_bin,f68_bin,f69_bin,f70_bin,f71_bin,f72_bin,f73_bin,f74_bin,f75_bin,f76_bin,f77_bin,f78_bin,f79_bin,f80_bin,f81_bin,f82_bin,f83_bin,f84_bin,f85_bin,f86_bin,f87_bin,f88_bin,f89_bin,f90_bin,f91_bin,f92_bin,f93_bin,f94_bin,f95_bin,f96_bin,f97_bin,f98_bin,f99_bin
0,0.106643,3.59437,3.18428,0.081971,1.18859,3.73238,2.26627,2.09959,0.01233,1.60719,-0.318058,0.560137,2.80688,1.35114,2.53593,0.197527,0.676494,1.98979,-3.84245,0.03738,0.230322,3.33055,0.009397,0.144738,3.05131,1.30362,0.033225,-0.018284,2.74821,-0.009294,-0.036271,-0.049871,0.019484,3.89846,1.13802,3.36688,4.94446,-0.105772,2.11345,3.45223,...,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1
1,0.125021,1.67336,3.37825,0.0994,5.09366,1.27562,-0.471318,4.54594,0.037706,0.331749,0.325091,0.06204,2.26215,4.33943,-0.224999,0.233586,3.38128,1.90299,0.067874,-0.051268,0.006135,2.60444,0.103441,0.067638,4.75362,1.85552,-0.181834,0.008359,3.16634,0.01185,0.022292,0.06932,0.117109,0.315276,1.67227,-0.409067,4.95475,0.092358,2.60318,1.95469,...,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1
2,0.03633,1.49747,2.19435,0.026914,3.12694,5.05687,3.84946,1.80187,0.056995,0.328684,2.96881,0.105244,2.06949,5.30986,1.35479,-0.262018,1.37908,1.48091,0.020542,-0.008806,0.109348,1.68365,0.03818,0.123716,1.11248,3.57166,0.120601,0.082069,2.23352,0.00227,0.045182,0.014405,0.011599,-0.502849,1.4175,1.07135,3.22296,2.12203,3.08216,0.637555,...,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1
3,-0.014077,0.246,1.89064,0.006948,1.53112,2.698,4.51733,4.50332,0.123494,1.00268,4.8696,0.058411,2.49785,1.23843,2.34836,0.175475,1.60889,2.02881,0.042086,0.005141,0.076506,1.65122,0.111813,0.121641,0.58912,4.23692,-0.032843,0.058168,0.712927,0.097465,0.072744,0.000324,0.063362,4.06382,0.576572,2.02621,2.96843,1.08567,1.71088,1.37182,...,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1
4,-0.003259,3.71542,2.14772,0.018284,2.09859,4.15492,-0.038236,3.37145,0.034166,0.711483,0.769988,0.057555,0.957257,3.71145,5.46435,0.287104,2.61695,1.38403,0.074883,-0.010543,0.109121,2.27602,0.008023,0.045236,4.35954,5.07562,-0.009376,0.528966,4.05335,0.02,0.106828,0.051307,0.045939,3.40246,1.63596,0.047029,4.01771,0.155748,5.28998,4.11892,...,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1


In [16]:
train_df = pd.concat([train_df, train['target']],axis = 1)

In [17]:
features = test_df.columns.tolist()
print(f"Num features: {len(features)}")

Num features: 194


In [18]:
train_df[features] = train_df[features].astype('float32')
test_df[features] = test_df[features].astype('float32')
print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")

train_df: (600000, 195) 
test_df: (540000, 194)


# Model1-DNN

In [12]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Input, BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Multiply

np.random.seed(42)
tf.random.set_seed(42)

In [13]:
def plot_confusion_matrix(cm, classes):

    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion matrix', fontweight='bold', pad=15)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=0)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label', fontweight='bold')
    plt.xlabel('Predicted label', fontweight='bold')
    plt.tight_layout()

In [14]:
def dnn_model1():
    
    x_input = Input(shape=(len(features),))
    
    x1 = Dense(units=384, activation='swish')(x_input)
    x1 = BatchNormalization()(x1)
    x2 = Dropout(rate=0.45)(x1)
    
    x2 = Dense(units=192, activation='swish')(x2)
    x2 = BatchNormalization()(x2)
    x3 = Dropout(rate=0.35)(x2)
    
    x3 = Dense(units=96, activation='swish')(x3)
    x3 = BatchNormalization()(x3)
    x3 = Dropout(rate=0.25)(x3)
    
    x4 = Dense(units=192, activation='swish')(x3)
    x4 = BatchNormalization()(x4)
    x4 = Multiply()([x2, x4])
    x4 = Dropout(rate=0.35)(x4)
    
    x5 = Dense(units=384, activation='swish')(x4)
    x5 = BatchNormalization()(x5)
    x5 = Multiply()([x1, x5])
    x5 = Dropout(rate=0.45)(x5)
    
    x = Concatenate()([x3, x5])
    x = Dense(units=128, activation='swish')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.25)(x)
    
    x_output = Dense(units=1, activation='sigmoid')(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

model1 = dnn_model1()
model1.summary()

Model: "DNN_Model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 194)]        0           []                               
                                                                                                  
 dense (Dense)                  (None, 384)          74880       ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 384)         1536        ['dense[0][0]']                  
 alization)                                                                                       
                                                                                                  
 dropout (Dropout)              (None, 384)          0           ['batch_normalization[0][

In [27]:
def dnn_model2():
    
    x_input = Input(shape=(len(features),))
    
    x1 = Dense(units=196, activation='swish')(x_input)
    x1 = BatchNormalization()(x1)
    x2 = Dropout(rate=0.45)(x1)
    
    x2 = Dense(units=96, activation='swish')(x2)
    x2 = BatchNormalization()(x2)
    x3 = Dropout(rate=0.35)(x2)
    
    x3 = Dense(units=48, activation='swish')(x3)
    x3 = BatchNormalization()(x3)
    x3 = Dropout(rate=0.25)(x3)
    
    x4 = Dense(units=96, activation='swish')(x3)
    x4 = BatchNormalization()(x4)
    x4 = Multiply()([x2, x4])
    x4 = Dropout(rate=0.35)(x4)
    
    x5 = Dense(units=196, activation='swish')(x4)
    x5 = BatchNormalization()(x5)
    x5 = Multiply()([x1, x5])
    x5 = Dropout(rate=0.45)(x5)
    
    x = Concatenate()([x3, x5])
    x = Dense(units=64, activation='swish')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.25)(x)
    
    x_output = Dense(units=1, activation='sigmoid')(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

model2 = dnn_model2()
model2.summary()

Model: "DNN_Model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_15 (InputLayer)          [(None, 194)]        0           []                               
                                                                                                  
 dense_96 (Dense)               (None, 196)          38220       ['input_15[0][0]']               
                                                                                                  
 batch_normalization_83 (BatchN  (None, 196)         784         ['dense_96[0][0]']               
 ormalization)                                                                                    
                                                                                                  
 dropout_82 (Dropout)           (None, 196)          0           ['batch_normalization_83[

In [30]:
def dnn_model3():
    
    x_input = Input(shape=(len(features),))
    
    x1 = Dense(units=98, activation='swish')(x_input)
    x1 = BatchNormalization()(x1)
    x2 = Dropout(rate=0.45)(x1)
    
    x2 = Dense(units=48, activation='swish')(x2)
    x2 = BatchNormalization()(x2)
    x3 = Dropout(rate=0.35)(x2)
    
    x3 = Dense(units=24, activation='swish')(x3)
    x3 = BatchNormalization()(x3)
    x3 = Dropout(rate=0.25)(x3)
    
    x4 = Dense(units=48, activation='swish')(x3)
    x4 = BatchNormalization()(x4)
    x4 = Multiply()([x2, x4])
    x4 = Dropout(rate=0.35)(x4)
    
    x5 = Dense(units=98, activation='swish')(x4)
    x5 = BatchNormalization()(x5)
    x5 = Multiply()([x1, x5])
    x5 = Dropout(rate=0.45)(x5)
    
    x = Concatenate()([x3, x5])
    x = Dense(units=32, activation='swish')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.25)(x)
    
    x_output = Dense(units=1, activation='sigmoid')(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

model3 = dnn_model3()
model3.summary()

Model: "DNN_Model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_18 (InputLayer)          [(None, 194)]        0           []                               
                                                                                                  
 dense_115 (Dense)              (None, 98)           19110       ['input_18[0][0]']               
                                                                                                  
 batch_normalization_100 (Batch  (None, 98)          392         ['dense_115[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 dropout_98 (Dropout)           (None, 98)           0           ['batch_normalization_100

In [40]:
def dnn_model4():
    
    x_input = Input(shape=(len(features),))
    
    x1 = Dense(units=49, activation='swish')(x_input)
    x1 = BatchNormalization()(x1)
    x2 = Dropout(rate=0.45)(x1)
    
    x2 = Dense(units=24, activation='swish')(x2)
    x2 = BatchNormalization()(x2)
    x3 = Dropout(rate=0.35)(x2)
    
    x3 = Dense(units=12, activation='swish')(x3)
    x3 = BatchNormalization()(x3)
    x3 = Dropout(rate=0.25)(x3)
    
    x4 = Dense(units=24, activation='swish')(x3)
    x4 = BatchNormalization()(x4)
    x4 = Multiply()([x2, x4])
    x4 = Dropout(rate=0.35)(x4)
    
    x5 = Dense(units=49, activation='swish')(x4)
    x5 = BatchNormalization()(x5)
    x5 = Multiply()([x1, x5])
    x5 = Dropout(rate=0.45)(x5)
    
    x = Concatenate()([x3, x5])
    x = Dense(units=16, activation='swish')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.25)(x)
    
    x_output = Dense(units=1, activation='sigmoid')(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

model4 = dnn_model4()
model4.summary()

Model: "DNN_Model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_39 (InputLayer)          [(None, 194)]        0           []                               
                                                                                                  
 dense_262 (Dense)              (None, 49)           9555        ['input_39[0][0]']               
                                                                                                  
 batch_normalization_226 (Batch  (None, 49)          196         ['dense_262[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 dropout_224 (Dropout)          (None, 49)           0           ['batch_normalization_226

**dnn_Model1**

In [19]:
### dnn_model1

FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model1 = dnn_model1()
        model1.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model1.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model1 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model1.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model1.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model1, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-0 | OOF Score: 0.7527734954240652



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-1 | OOF Score: 0.7566354546198236



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-2 | OOF Score: 0.7571359217485271



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-3 | OOF Score: 0.7559652640941256



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-4 | OOF Score: 0.7568164389723815


Seed: 2021 | Aggregate OOF Score: 0.7558653149717847




  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-0 | OOF Score: 0.7532582805660633



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-1 | OOF Score: 0.7561136274622815



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-2 | OOF Score: 0.7574400717789872



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-3 | OOF Score: 0.7544944849076469



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-4 | OOF Score: 0.7572055679575798


Seed: 2025 | Aggregate OOF Score: 0.7557024065345116


Aggregate OOF Score: 0.7557838607531482


In [21]:
y_pred_meta = np.mean(y_pred_meta_dnn, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

              precision    recall  f1-score   support

           0       0.74      0.73      0.74    296394
           1       0.74      0.75      0.75    303606

    accuracy                           0.74    600000
   macro avg       0.74      0.74      0.74    600000
weighted avg       0.74      0.74      0.74    600000



In [22]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

<Figure size 864x360 with 0 Axes>

<Figure size 864x360 with 0 Axes>

In [23]:
submit_df = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df['target'] = y_pred_final_dnn.ravel()
submit_df.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN1_cbrt_Submission.csv", index=False)
submit_df.head()

Unnamed: 0,id,target
0,600000,0.749574
1,600001,0.714452
2,600002,0.765703
3,600003,0.33145
4,600004,0.675498


# Model2-DNN

In [32]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model2 = dnn_model2()
        model2.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model2.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model2 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model2.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model2.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model2, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn2 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn2 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-0 | OOF Score: 0.7529289020314891



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-1 | OOF Score: 0.7570648995484572



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-2 | OOF Score: 0.7573637792345448



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-3 | OOF Score: 0.7557084950721631



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-4 | OOF Score: 0.7569301808589997


Seed: 2021 | Aggregate OOF Score: 0.7559992513491307




  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-0 | OOF Score: 0.7533209476707662



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-1 | OOF Score: 0.7556951662030261



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-2 | OOF Score: 0.7577676518594196



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-3 | OOF Score: 0.7545757117757181



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-4 | OOF Score: 0.757107022993715


Seed: 2025 | Aggregate OOF Score: 0.755693300100529


Aggregate OOF Score: 0.7558462757248299


In [33]:
y_pred_meta = np.mean(y_pred_meta_dnn2, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

              precision    recall  f1-score   support

           0       0.74      0.73      0.74    296394
           1       0.74      0.75      0.75    303606

    accuracy                           0.74    600000
   macro avg       0.74      0.74      0.74    600000
weighted avg       0.74      0.74      0.74    600000



In [34]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

<Figure size 864x360 with 0 Axes>

<Figure size 864x360 with 0 Axes>

In [35]:
submit_df2 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df2['target'] = y_pred_final_dnn2.ravel()
submit_df2.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN_model2_Submission.csv", index=False)
submit_df2.head()

Unnamed: 0,id,target
0,600000,0.751021
1,600001,0.717366
2,600002,0.764463
3,600003,0.332137
4,600004,0.67735


# Model3-DNN

In [36]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model3 = dnn_model3()
        model3.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model3.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model3 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model3.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model3.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model3, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn3 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn3 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-0 | OOF Score: 0.7528662181188037



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-1 | OOF Score: 0.7572968756847647



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-2 | OOF Score: 0.7576959991516374



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-3 | OOF Score: 0.7556736247591024



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-4 | OOF Score: 0.7569505982599065


Seed: 2021 | Aggregate OOF Score: 0.756096663194843




  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-0 | OOF Score: 0.7537682596238475



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-1 | OOF Score: 0.7559478286597773



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-2 | OOF Score: 0.7574156629765715



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-3 | OOF Score: 0.7546679919065502



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-4 | OOF Score: 0.7573783840926593


Seed: 2025 | Aggregate OOF Score: 0.7558356254518811


Aggregate OOF Score: 0.7559661443233621


In [37]:
y_pred_meta = np.mean(y_pred_meta_dnn3, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

              precision    recall  f1-score   support

           0       0.74      0.73      0.74    296394
           1       0.74      0.75      0.75    303606

    accuracy                           0.74    600000
   macro avg       0.74      0.74      0.74    600000
weighted avg       0.74      0.74      0.74    600000



In [38]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

<Figure size 864x360 with 0 Axes>

<Figure size 864x360 with 0 Axes>

In [39]:
submit_df3 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df3['target'] = y_pred_final_dnn3.ravel()
submit_df3.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN_model3_Submission.csv", index=False)
submit_df3.head()

Unnamed: 0,id,target
0,600000,0.745709
1,600001,0.721099
2,600002,0.75918
3,600003,0.32733
4,600004,0.682954


Model4-DNN

In [41]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model4 = dnn_model4()
        model4.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model4.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model4 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model4.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model4.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model4, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn4 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn4 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-0 | OOF Score: 0.7526248195107775



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-1 | OOF Score: 0.7574569074045006



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-2 | OOF Score: 0.7572408088386965



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-3 | OOF Score: 0.7553429659007602



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-4 | OOF Score: 0.7568985223859287


Seed: 2021 | Aggregate OOF Score: 0.7559128048081327




  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-0 | OOF Score: 0.7540791829934906



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-1 | OOF Score: 0.7559116102353824



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-2 | OOF Score: 0.7574831538333812



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-3 | OOF Score: 0.7544746577390427



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-4 | OOF Score: 0.7571025741550739


Seed: 2025 | Aggregate OOF Score: 0.7558102357912742


Aggregate OOF Score: 0.7558615202997034


In [42]:
y_pred_meta = np.mean(y_pred_meta_dnn4, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

              precision    recall  f1-score   support

           0       0.74      0.73      0.74    296394
           1       0.74      0.75      0.75    303606

    accuracy                           0.74    600000
   macro avg       0.74      0.74      0.74    600000
weighted avg       0.74      0.74      0.74    600000



In [43]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

<Figure size 864x360 with 0 Axes>

<Figure size 864x360 with 0 Axes>

In [44]:
submit_df4 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df4['target'] = y_pred_final_dnn4.ravel()
submit_df4.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN_model4_Submission.csv", index=False)
submit_df4.head()

Unnamed: 0,id,target
0,600000,0.74329
1,600001,0.724928
2,600002,0.753394
3,600003,0.32153
4,600004,0.690102


# Model5-DNN

In [None]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model5 = dnn_model()
        model5.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model5.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model5 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model5.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model5.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model5, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn5 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn5 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

In [None]:
y_pred_meta = np.mean(y_pred_meta_dnn5, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

In [None]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

In [None]:
submit_df5 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df5['target'] = y_pred_final_dnn5.ravel()
submit_df5.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN_model4_Submission.csv", index=False)
submit_df5.head()

# EDA-6 luca

Credits to the following beautiful notebook by Bex - https://www.kaggle.com/bextuychiev/model-explainability-with-shap-only-guide-u-need/notebook

I have also used the following one from Luca as reference - https://www.kaggle.com/lucamassaron/feature-selection-by-boruta-shap

Following is a good notebook on LOFO - https://www.kaggle.com/frankmollard/lofo-importance-correlations-tps-nov-21

I am doing a simple SelectKBest (30 variables) in this data and then taking common ones from the above methods, to see what variables truly stand out.

Will update if I add another set and once I try mutual information with my engineered variables

In [None]:
train = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/train.csv")
test = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/test.csv")

In [None]:
train_df = train[['f1', 'f10', 'f11', 'f14', 'f15', 'f16', 'f17',
                     'f2', 'f20', 'f21', 'f22','f24','f25', 'f26',
                     'f27', 'f28', 'f3', 'f30', 'f31', 'f32', 'f33',
                     'f34', 'f36', 'f37', 'f4', 'f40', 'f41','f42',
                     'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49',
                     'f5','f50', 'f51', 'f53', 'f54', 'f55', 'f57',
                     'f58', 'f59', 'f60', 'f61', 'f62', 'f64', 'f66',
                     'f67', 'f70', 'f71', 'f76','f77', 'f8', 'f80',
                     'f81','f82', 'f83', 'f87', 'f89', 'f9', 'f90',
                     'f91', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98']]
test_df = test[['f1', 'f10', 'f11', 'f14', 'f15', 'f16', 'f17',
                     'f2', 'f20', 'f21', 'f22','f24','f25', 'f26',
                     'f27', 'f28', 'f3', 'f30', 'f31', 'f32', 'f33',
                     'f34', 'f36', 'f37', 'f4', 'f40', 'f41','f42',
                     'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49',
                     'f5','f50', 'f51', 'f53', 'f54', 'f55', 'f57',
                     'f58', 'f59', 'f60', 'f61', 'f62', 'f64', 'f66',
                     'f67', 'f70', 'f71', 'f76','f77', 'f8', 'f80',
                     'f81','f82', 'f83', 'f87', 'f89', 'f9', 'f90',
                     'f91', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98']]

In [None]:
train_df['mean'] = train_df.mean(axis=1)
train_df['std'] = train_df.std(axis=1)
train_df['var'] = train_df.var(axis=1)
train_df['sum'] = train_df.sum(axis=1)
train_df['max'] = train_df.max(axis=1)
train_df['kurt'] = train_df.kurt(axis=1)
train_df['quantile'] = train_df.quantile(axis=1)


test_df['mean'] = test_df.mean(axis=1)
test_df['std'] = test_df.std(axis=1)
test_df['var'] = test_df.var(axis=1)
test_df['sum'] = test_df.sum(axis=1)
test_df['max'] = test_df.max(axis=1)
test_df['kurt'] = test_df.kurt(axis=1)
test_df['quantile'] = test_df.quantile(axis=1)

In [None]:
train_df = pd.concat([train_df, train['target']],axis = 1)

In [None]:
features = test_df.columns.tolist()
print(f"Num features: {len(features)}")

Num features: 77


In [None]:
train_df[features] = train_df[features].astype('float32')
test_df[features] = test_df[features].astype('float32')
print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


train_df: (600000, 78) 
test_df: (540000, 77)


# Model6-DNN

In [None]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model6 = dnn_model()
        model6.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model6.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model6 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model6.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model6.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model6, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn6 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn6 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-0 | OOF Score: 0.7424148335192977



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-1 | OOF Score: 0.745809829035371



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-2 | OOF Score: 0.7467455127594718



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-3 | OOF Score: 0.7463032208368398



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-4 | OOF Score: 0.7487927579677902


Seed: 2021 | Aggregate OOF Score: 0.7460132308237541




  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-0 | OOF Score: 0.7438889572165024



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-1 | OOF Score: 0.7452566569349437



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-2 | OOF Score: 0.7468646138465906



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-3 | OOF Score: 0.7456450231540226



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-4 | OOF Score: 0.7483499692681611


Seed: 2025 | Aggregate OOF Score: 0.746001044084044


Aggregate OOF Score: 0.7460071374538991


In [None]:
y_pred_meta = np.mean(y_pred_meta_dnn6, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

              precision    recall  f1-score   support

           0       0.73      0.72      0.72    296394
           1       0.73      0.74      0.73    303606

    accuracy                           0.73    600000
   macro avg       0.73      0.73      0.73    600000
weighted avg       0.73      0.73      0.73    600000



In [None]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

<Figure size 864x360 with 0 Axes>

<Figure size 864x360 with 0 Axes>

In [None]:
submit_df6 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df6['target'] = y_pred_final_dnn6.ravel()
submit_df6.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN_model6_Submission.csv", index=False)
submit_df6.head()

Unnamed: 0,id,target
0,600000,0.742414
1,600001,0.733466
2,600002,0.747426
3,600003,0.606355
4,600004,0.7181


# EDA-7 my_features_f_classif 

In [None]:
train = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/train.csv")
test = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/test.csv")

In [None]:
train_df = train[['f3','f8','f10','f17','f21','f22','f24','f25','f26','f27','f34',
               'f40','f41','f43','f44','f47','f50','f54','f55','f57','f60','f66',
               'f71','f80','f81','f82','f91','f96','f97','f98']]
test_df = test[['f3','f8','f10','f17','f21','f22','f24','f25','f26','f27','f34',
               'f40','f41','f43','f44','f47','f50','f54','f55','f57','f60','f66',
               'f71','f80','f81','f82','f91','f96','f97','f98']]

In [None]:
features = test_df.columns.tolist()
len(features)

30

In [None]:
for col in tqdm(features):
    train_df[col+'_bin'] = train_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)
    test_df[col+'_bin'] = test_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)

print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")
train_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
 30%|███       | 9/30 [00:15<00:36,  1.75s/it]


KeyboardInterrupt: ignored

In [None]:
features = test_df.columns.tolist()
print(f"Num features: {len(features)}")

In [None]:
train_df[features] = train_df[features].astype('float32')
test_df[features] = test_df[features].astype('float32')
print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")

# Model7-DNN

In [None]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model7 = dnn_model()
        model7.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model7.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model7 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model7.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model7.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model7, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn7 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn7 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

In [None]:
y_pred_meta = np.mean(y_pred_meta_dnn7, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

In [None]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

In [None]:
submit_df7 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df7['target'] = y_pred_final_dnn5.ravel()
submit_df7.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/DNN_model7_Submission.csv", index=False)
submit_df7.head()

#EDA-8 lofo_features

In [None]:
lofo_features = set(['f34','f55','f8','f43','f91','f71','f80','f27','f50','f41','f97','f66','f57',
                'f22','f25','f96','f81','f82','f21','f24','f26','f54','f98','f40','f60','f3','f17',
                'f95','f5','f45'])

# Model8-DNN

#EDA-9 cor_features 

In [None]:
cor_features = set(['f34', 'f55', 'f43', 'f71', 'f80', 'f91', 'f8', 'f27', 'f97', 'f50', 'f41', 'f57',
                    'f25', 'f22', 'f66', 'f96', 'f81', 'f82', 'f21', 'f40', 'f24', 'f60', 'f98', 'f3',
                    'f54', 'f44', 'f26', 'f47', 'f17', 'f10'])

# Model9-DNN

In [None]:
train = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/train.csv")
test = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/test.csv")

In [None]:
df_train = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/train.csv")
df_test = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/test.csv")

In [None]:
df_train = train.drop(['target', 'id'], axis = 1)

In [None]:
df_test = test.drop(['id'],axis = 1)

In [None]:
train_df = train.drop(['target', 'id'], axis = 1)

In [None]:
test_df = test.drop(['id'],axis = 1)

In [None]:
features = test_df.columns.tolist()
len(features)

100

In [None]:
for col in tqdm(features):
    train_df[col+'_bin'] = train_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)
    test_df[col+'_bin'] = test_df[col].apply(lambda x: 1 if np.cbrt(x)>0 else 0)

print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")
train_df.head()

100%|██████████| 100/100 [02:33<00:00,  1.53s/it]

train_df: (600000, 200) 
test_df: (540000, 200)





Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,...,f60_bin,f61_bin,f62_bin,f63_bin,f64_bin,f65_bin,f66_bin,f67_bin,f68_bin,f69_bin,f70_bin,f71_bin,f72_bin,f73_bin,f74_bin,f75_bin,f76_bin,f77_bin,f78_bin,f79_bin,f80_bin,f81_bin,f82_bin,f83_bin,f84_bin,f85_bin,f86_bin,f87_bin,f88_bin,f89_bin,f90_bin,f91_bin,f92_bin,f93_bin,f94_bin,f95_bin,f96_bin,f97_bin,f98_bin,f99_bin
0,0.106643,3.59437,132.804,3.18428,0.081971,1.18859,3.73238,2.26627,2.09959,0.01233,1.60719,-0.318058,0.560137,2.80688,1.35114,2.53593,0.197527,0.676494,1.98979,-3.84245,0.03738,0.230322,3.33055,0.009397,0.144738,3.05131,1.30362,0.033225,-0.018284,2.74821,-0.009294,-0.036271,-0.049871,0.019484,3.89846,11.2863,1.13802,3.36688,4.94446,-0.105772,...,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1
1,0.125021,1.67336,76.5336,3.37825,0.0994,5.09366,1.27562,-0.471318,4.54594,0.037706,0.331749,0.325091,0.06204,2.26215,4.33943,-0.224999,0.233586,3.38128,1.90299,0.067874,-0.051268,0.006135,2.60444,0.103441,0.067638,4.75362,1.85552,-0.181834,0.008359,3.16634,0.01185,0.022292,0.06932,0.117109,0.315276,24.4807,1.67227,-0.409067,4.95475,0.092358,...,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1
2,0.03633,1.49747,233.546,2.19435,0.026914,3.12694,5.05687,3.84946,1.80187,0.056995,0.328684,2.96881,0.105244,2.06949,5.30986,1.35479,-0.262018,1.37908,1.48091,0.020542,-0.008806,0.109348,1.68365,0.03818,0.123716,1.11248,3.57166,0.120601,0.082069,2.23352,0.00227,0.045182,0.014405,0.011599,-0.502849,33.7382,1.4175,1.07135,3.22296,2.12203,...,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1
3,-0.014077,0.246,779.967,1.89064,0.006948,1.53112,2.698,4.51733,4.50332,0.123494,1.00268,4.8696,0.058411,2.49785,1.23843,2.34836,0.175475,1.60889,2.02881,0.042086,0.005141,0.076506,1.65122,0.111813,0.121641,0.58912,4.23692,-0.032843,0.058168,0.712927,0.097465,0.072744,0.000324,0.063362,4.06382,25.3824,0.576572,2.02621,2.96843,1.08567,...,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1,0,0,1,1,1,1,1,1
4,-0.003259,3.71542,156.128,2.14772,0.018284,2.09859,4.15492,-0.038236,3.37145,0.034166,0.711483,0.769988,0.057555,0.957257,3.71145,5.46435,0.287104,2.61695,1.38403,0.074883,-0.010543,0.109121,2.27602,0.008023,0.045236,4.35954,5.07562,-0.009376,0.528966,4.05335,0.02,0.106828,0.051307,0.045939,3.40246,15.5615,1.63596,0.047029,4.01771,0.155748,...,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1


In [None]:
df_train['mean'] = df_train.mean(axis=1)
df_train['std'] = df_train.std(axis=1)
df_train['var'] = df_train.var(axis=1)
df_train['sum'] = df_train.sum(axis=1)
df_train['max'] = df_train.max(axis=1)
df_train['kurt'] = df_train.kurt(axis=1)
df_train['quantile'] = df_train.quantile(axis=1)


df_test['mean'] = df_test.mean(axis=1)
df_test['std'] = df_test.std(axis=1)
df_test['var'] = df_test.var(axis=1)
df_test['sum'] = df_test.sum(axis=1)
df_test['max'] = df_test.max(axis=1)
df_test['kurt'] = df_test.kurt(axis=1)
df_test['quantile'] = df_test.quantile(axis=1)

In [None]:
df_train = df_train[['mean','std', 'var', 'sum', 'max', 'kurt', 'quantile']]
df_test = df_test[['mean','std', 'var', 'sum', 'max', 'kurt', 'quantile']]

In [None]:
train_df = pd.concat([train_df,df_train,train['target']],axis = 1)
test_df = pd.concat([test_df,df_test],axis = 1)

In [None]:
features = test_df.columns.tolist()
print(f"Num features: {len(features)}")

Num features: 207


In [None]:
train_df[features] = train_df[features].astype('float32')
test_df[features] = test_df[features].astype('float32')
print(f"train_df: {train_df.shape} \ntest_df: {test_df.shape}")

train_df: (600000, 208) 
test_df: (540000, 207)


Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,...,f67_bin,f68_bin,f69_bin,f70_bin,f71_bin,f72_bin,f73_bin,f74_bin,f75_bin,f76_bin,f77_bin,f78_bin,f79_bin,f80_bin,f81_bin,f82_bin,f83_bin,f84_bin,f85_bin,f86_bin,f87_bin,f88_bin,f89_bin,f90_bin,f91_bin,f92_bin,f93_bin,f94_bin,f95_bin,f96_bin,f97_bin,f98_bin,f99_bin,mean,std,var,sum,max,kurt,quantile
0,0.003229,4.838660,585.528992,2.282910,0.713180,3.907830,0.480696,1.482270,4.891810,0.056351,4.200990,3.151800,0.000349,1.851160,2.63889,0.746668,-0.004756,1.610300,4.11482,-0.077756,0.129446,0.053324,0.416789,0.445009,0.150464,5.021300,2.221390,-0.072333,-0.215874,1.56236,0.074881,0.010050,0.018582,0.067466,5.578300,3.085560,3.842470,0.011125,2.35997,0.695092,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.101993,58.157021,3374.306641,4149.764648,4149.764648,32.237778,0.430899
1,0.008602,0.505536,-100.098999,3.012670,0.027199,1.194610,5.036620,2.517440,4.553890,0.063876,0.337257,4.439690,0.013188,3.379010,3.38470,1.167400,2.246550,1.750170,2.76624,-0.058501,0.012595,0.036144,0.769057,0.017496,0.050283,0.324697,4.948640,0.124789,0.347128,1.24512,0.035822,-0.013188,0.023194,0.006444,4.983330,23.706900,8.287290,4.796230,1.79928,-0.050040,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.551832,10.509767,110.333733,176.578537,176.578537,28.490255,0.394878
2,1.461000,2.437260,-112.963997,3.541230,0.752338,4.338310,1.648080,4.699910,1.950250,0.005303,2.071680,0.546499,0.141781,1.673170,4.30649,1.702330,-0.062869,1.619230,4.19053,0.055140,-0.016590,0.017805,3.064810,0.070370,0.098316,3.507540,1.069100,0.012750,0.009981,3.46781,0.035920,-0.009804,0.065728,-0.004725,5.281020,11.528800,0.171694,4.394570,2.52084,0.079365,...,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.224444,11.536294,133.022888,167.228073,167.228073,25.425522,0.278908
3,0.140556,3.085610,179.451004,0.573945,0.057342,2.216790,1.623480,0.526174,1.542540,-0.026160,1.609440,1.723560,-0.019564,1.552130,4.83264,1.501640,0.192669,4.614890,1.47069,-0.010031,0.072805,0.048035,3.230210,-0.031548,0.028697,3.752520,4.948470,-0.174542,-0.033491,2.47823,0.068130,0.090797,0.029877,0.146718,3.169830,-12.984600,3.135210,1.765010,3.25399,0.713238,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.714848,17.870302,318.437653,610.507568,610.507568,36.194351,0.182896
4,0.128876,5.199760,107.466003,-0.497149,0.080220,0.458121,0.629839,5.240460,-0.232279,0.030006,0.481359,2.176020,0.193162,1.392090,2.51890,2.993170,4.170910,0.318375,4.84563,0.085064,0.026443,-0.004559,0.120327,-0.008630,0.004495,4.921700,1.685640,0.095628,0.189131,2.56955,0.046643,0.111462,0.002912,0.060737,4.253000,38.703400,0.170825,-0.598784,3.92796,0.262956,...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,2.540107,11.305001,127.290840,395.146667,395.146667,41.474819,0.181556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539995,0.431599,1.507560,24.269800,2.928480,2.788830,5.152330,2.779980,0.816389,4.791560,0.026331,4.562970,0.233379,0.254225,2.213830,5.58360,3.800710,0.503562,4.061380,2.75466,0.112982,-0.038411,0.077714,3.010950,-0.008242,0.043315,0.080434,2.371080,0.133727,0.355986,2.01470,0.002188,0.220446,0.078856,0.084574,0.758547,-3.627310,2.367110,0.179322,1.25804,0.029282,...,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.660959,3.002872,8.945618,179.705383,179.705383,48.332188,0.457981
539996,0.069713,2.355480,-128.755005,2.721580,1.256300,4.248220,2.014550,2.207120,3.020260,0.020398,4.588860,3.219260,-0.011444,2.802410,1.71392,4.173030,-0.028895,2.029860,1.67972,0.059944,0.052139,-0.006159,2.235450,0.105783,0.091785,4.098640,0.232705,0.173007,0.180720,3.94704,0.211080,0.034419,0.077198,0.027838,1.180220,-3.170320,-0.545938,3.045400,2.23775,0.458355,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,-0.041132,13.038609,169.999344,178.883606,178.883606,24.497311,0.267225
539997,0.385075,2.528890,-63.985401,0.975396,0.043852,0.829423,2.014210,1.509500,2.027590,0.097387,3.425350,1.195470,0.090862,-0.130309,5.42106,1.561510,0.132260,0.392255,5.08088,-0.059632,0.074616,0.089685,4.798180,0.092356,0.014446,3.729750,1.649350,-0.005483,0.326276,2.88960,0.067742,0.028514,0.074359,0.070743,1.646070,29.165400,0.364036,4.478340,3.64816,0.647542,...,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.822070,7.245872,52.387405,142.662323,142.662323,34.947292,0.510507
539998,1.846240,3.415350,26.847601,-0.120134,0.027113,2.155160,2.529860,2.502250,3.453090,0.090760,1.307160,4.107690,0.034657,0.867694,1.60534,2.978230,0.145505,2.855640,1.13562,0.138793,-0.005488,0.012881,0.576931,0.015822,0.120977,1.147820,0.252155,0.202905,0.125948,3.66790,0.092273,-0.005253,0.029498,0.110625,2.201660,13.295000,0.181733,1.299380,1.48190,0.229645,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.476450,3.194854,10.134982,162.451324,162.451324,47.713188,0.192319


# Model10-DNN

In [None]:
FOLD = 5
VERBOSE = 0
SEEDS = [2021, 2025]
BATCH_SIZE = 512

counter = 0
oof_score = 0
y_pred_final_dnn = np.zeros((test_df.shape[0], 1))
y_pred_meta_dnn = np.zeros((train_df.shape[0], 1))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(train_df[features], train_df['target'])):
        counter += 1

        train_x, train_y = train_df[features].iloc[train], train_df['target'].iloc[train]
        val_x, val_y = train_df[features].iloc[val], train_df['target'].iloc[val]

        model7 = dnn_model()
        model7.compile(optimizer=Adam(learning_rate=1e-2), 
                      loss="binary_crossentropy", 
                      metrics=['AUC'])

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, 
                               patience=4, verbose=VERBOSE)
        
        chk_point = ModelCheckpoint(f'./Keras_DNN_Model_{counter}C.h5', 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=15, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        model7.fit(train_x, train_y, 
                  validation_data=(val_x, val_y), 
                  epochs=300,
                  verbose=VERBOSE,
                  batch_size=BATCH_SIZE, 
                  callbacks=[lr, chk_point, es])
        
        model7 = load_model(f'./Keras_DNN_Model_{counter}C.h5')
        
        y_pred = model7.predict(val_x, batch_size=BATCH_SIZE)
        y_pred_meta_dnn[val] += y_pred
        y_pred_final_dnn += model7.predict(test_df, batch_size=BATCH_SIZE)
        
        score = roc_auc_score(val_y, y_pred)
        oof_score += score
        seed_score += score
        print("\nSeed-{} | Fold-{} | OOF Score: {}\n".format(seed, idx, score))
        
        del model7, y_pred
        del train_x, train_y
        del val_x, val_y
        gc.collect()
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_dnn7 = y_pred_meta_dnn / float(len(SEEDS))
y_pred_final_dnn7 = y_pred_final_dnn / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-0 | OOF Score: 0.744060517545417



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-1 | OOF Score: 0.7488193638908178



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-2 | OOF Score: 0.7447349246522129



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-3 | OOF Score: 0.7486394165174783



  layer_config = serialize_layer_fn(layer)



Seed-2021 | Fold-4 | OOF Score: 0.7520500681021389


Seed: 2021 | Aggregate OOF Score: 0.747660858141613




  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-0 | OOF Score: 0.7461143771788741



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-1 | OOF Score: 0.7495787028451294



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-2 | OOF Score: 0.7494473348477763



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-3 | OOF Score: 0.7485224330972622



  layer_config = serialize_layer_fn(layer)



Seed-2025 | Fold-4 | OOF Score: 0.7518934219473795


Seed: 2025 | Aggregate OOF Score: 0.7491112539832844


Aggregate OOF Score: 0.7483860560624487


In [None]:
y_pred_meta = np.mean(y_pred_meta_dnn7, axis=1)
y_pred = (y_pred_meta>0.5).astype(int)
print(classification_report(train_df['target'], y_pred))

              precision    recall  f1-score   support

           0       0.73      0.73      0.73    296394
           1       0.74      0.74      0.74    303606

    accuracy                           0.73    600000
   macro avg       0.73      0.73      0.73    600000
weighted avg       0.73      0.73      0.73    600000



In [None]:
cnf_matrix = confusion_matrix(train_df['target'], y_pred, labels=[0, 1])
np.set_printoptions(precision=2)
plt.figure(figsize=(12, 5))

<Figure size 864x360 with 0 Axes>

<Figure size 864x360 with 0 Axes>

In [None]:
submit_df7 = pd.read_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/sample_submission.csv")
submit_df7['target'] = y_pred_final_dnn7.ravel()
submit_df7.to_csv("/content/drive/MyDrive/AI/dataset/tabular-playground-series-nov-2021/Derived_data.csv", index=False)
submit_df7.head()

Unnamed: 0,id,target
0,600000,0.723546
1,600001,0.732156
2,600002,0.739589
3,600003,0.479302
4,600004,0.721516
