# MLP KerasTuner

# Load libraries

In [1]:
import pandas as pd
#import cudf
import numpy as np
import tensorflow as tf
import keras_tuner as kt

from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

from colorama import Fore, Back, Style
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator

import os
import gc
import random
import math
import pickle
import datetime

# Custom functions

In [2]:
def pickle_load(path):
    import pickle
    file = open(path,'rb')
    loadobj = pickle.load(file)
    file.close()
    return loadobj

In [3]:
def pickle_dump(path, saveobj):
    import pickle
    filehandler = open(path,"wb")
    pickle.dump(saveobj,filehandler)
    print("File pickled")
    filehandler.close()

In [4]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

# Read in train data

In [5]:
train = pd.read_parquet('../input/amex-data-integer-dtypes-parquet-format/train.parquet')
train = train.groupby(['customer_ID']).last().reset_index()
train.drop(columns=['S_2'], inplace=True)

labels_df = pd.read_csv("../input/amex-default-prediction/train_labels.csv")
train = pd.merge(train, labels_df, on="customer_ID", how="left")

del labels_df
_ = gc.collect()

train.shape

In [6]:
test = pd.read_parquet('../input/private-amex/test_prep.parquet')
test['target']=-99

In [7]:
All=train.append(test, ignore_index=True)

In [8]:
CATS = All.nunique()[All.nunique()<=10].index.tolist()
CATS = [col for col in CATS if col not in ['target']]
CATS

In [9]:
len(CATS)

In [10]:
All=reduce_mem_usage(All)

In [11]:
%%time

#CATS = train.nunique()[train.nunique()<=10].index.tolist()
#CATS = [col for col in CATS if col not in ['target']]

# Save categorical columns
#path = "./cat_cols.pkl"
#pickle_dump(path, CATS)

# Apply one-hot encoder to each column with categorical data
OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
OH_cols_All = pd.DataFrame(OH_encoder.fit_transform(All[CATS]), columns=OH_encoder.get_feature_names_out())

# One-hot encoding removed index; put it back
OH_cols_All.index = All.index

# Remove categorical columns (will replace with one-hot encoding)
All = All.drop(CATS, axis=1)

# Add one-hot encoded columns to numerical features
All = pd.concat([All, OH_cols_All], axis=1)

# Save encoder
path = "./ohe_encoder.pkl"
pickle_dump(path, OH_encoder)

del OH_cols_All
_ = gc.collect()

All.shape

In [12]:
test=All[All["target"] == -99] 
train=All[All["target"] != -99]

In [13]:
test=test.drop(['target'], axis=1)

In [14]:
modelCols = [col for col in train.columns if col not in ['target','customer_ID']]

selCols = modelCols + ['target','customer_ID']

ONE_FOLD_ONLY = True

# Save model columns
path = "./modelCols.pkl"
pickle_dump(path, modelCols)

len(modelCols)

# Build MLP

In [15]:
def make_MLP(hp):
        
    inputs = tf.keras.Input(shape=(len(modelCols), ))
    
    activation = hp.Choice('activation', ['relu','elu','selu','tanh','swish']) 
    num_dense = hp.Int('num_dense', min_value=0, max_value=3, step=1)
    reg = hp.Float('reg', min_value=1e-6, max_value=1e-2)
    
    x = tf.keras.layers.BatchNormalization()(inputs)
    
    for i in range(num_dense):
        units = hp.Int('units_{i}'.format(i=i), min_value=32, max_value=1024, step=32)
        dp = hp.Float('dp_{i}'.format(i=i), min_value=0., max_value=0.5)
        
        if dp == 0:
            x = tf.keras.layers.Dense(units, kernel_regularizer=tf.keras.regularizers.l2(reg), activation=activation)(x)
        else: 
            x = tf.keras.layers.Dropout(dp)(x)
            x = tf.keras.layers.Dense(units, kernel_regularizer=tf.keras.regularizers.l2(reg), activation=activation)(x)
        
        bn = hp.Boolean(f'batchnorm_{i}')
        if bn: x = tf.keras.layers.BatchNormalization()(x)
    
    dp = hp.Float('final_dp', min_value=0., max_value=0.5)
    
    if dp != 0: x = tf.keras.layers.Dropout(dp)(x)
    
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs, outputs)

    learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-2)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                  optimizer=optimizer)
    model.summary()
    return model

# KerasTuner Search

In [16]:
%%time

SEED = 42
NUM_TRIALS = 5
EPOCHS = 20
BATCH_SIZE = 512

FOLDS = 5
ONE_FOLD_ONLY = True

np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

skf = KFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
for fold,(train_idx, valid_idx) in enumerate(skf.split(train, train.target )):
    
    print('#'*25)
    print('### Fold',fold+1)
    print('#'*25)
    
    # Train and Valid datasets
    X_train = train.loc[train_idx, modelCols]
    y_train = train.loc[train_idx, 'target']
    
    X_valid = train.loc[valid_idx, modelCols]
    y_valid = train.loc[valid_idx, 'target']
    
    # Preprocess data
    # Fill nulls
    X_train = X_train.fillna(X_train.median())
    X_valid = X_valid.fillna(X_train.median())
    
    # Scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_valid = scaler.transform(X_valid)
    pickle_dump("./scaler.pkl", scaler)
    
    validation_data = (X_valid, y_valid)
    
    
    # KerasTuner
    tuner = kt.tuners.BayesianOptimization(
            make_MLP,
            kt.Objective("val_loss", direction="min"),
            max_trials=NUM_TRIALS
            )


    es = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                       patience=5, 
                       verbose=10,
                       mode="min", 
                       restore_best_weights=True)
    
    callbacks = [es, tf.keras.callbacks.TerminateOnNaN()]
    
    # Kick off search
    tuner.search(X_train, y_train, validation_data=validation_data, 
                 epochs=EPOCHS, 
                 batch_size=BATCH_SIZE,
                 callbacks=callbacks)
    
    
    del X_train, X_valid, y_train, y_valid
    _ = gc.collect()
    
    if ONE_FOLD_ONLY: break

# KerasTuner results

In [17]:
tuner.get_best_models(num_models=1)

In [18]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f'''
Activation: {best_hps.get('activation')}
Num_dense: {best_hps.get('num_dense')}
Regularization: {best_hps.get('reg')}
learning_rate: {best_hps.get('learning_rate')}
        ''')

for i in range(best_hps.get('num_dense')):
    print(f"Layer {i+1}:")
    print()
    print(f''' 
            Dropout_{i}: {best_hps.get(f'dp_{i}')}
            Units_{i}: {best_hps.get(f'units_{i}')}
            Batchnorm_{i}: {best_hps.get(f'batchnorm_{i}')}
            ''')

print(f"Final Dropout: {best_hps.get('final_dp')}")

In [19]:
# Save keras tuner object
path = "./kerastuner.pkl"
pickle_dump(path, tuner)

# Train best model

In [20]:
# Get best model from tuner
model = tuner.hypermodel.build(best_hps)

# Preprocessing
scaler = pickle_load("./scaler.pkl")
fillNulls = train.median()

# Save fillNulls
path = "./fillNulls.pkl"
pickle_dump(path, fillNulls)

train[modelCols] = scaler.fit_transform(train[modelCols])
train = train.fillna(fillNulls)

# Define callbacks
lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.7, 
                           patience=4, verbose=1)

es = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                   patience=8, 
                   verbose=1,
                   mode="min", 
                   restore_best_weights=True)

callbacks = [lr, es, tf.keras.callbacks.TerminateOnNaN()]

# Train model
history = model.fit(train.drop(columns=['target','customer_ID']), train.target, 
                    batch_size=256, epochs=50, callbacks=callbacks, 
                    validation_split=0.2)

val_acc_per_epoch = history.history['val_loss']
best_epoch = val_acc_per_epoch.index(min(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [21]:
model = tuner.hypermodel.build(best_hps)

# Retrain the model till best epoch
model.fit(train.drop(columns=['target','customer_ID']), train.target, 
                    batch_size=256, epochs=best_epoch, callbacks=callbacks)

In [22]:
tf.keras.models.save_model(model, "./kt_model.hdf5")

extractor = tf.keras.Model(inputs=model.inputs,
                                outputs=model.layers[2].output)

features_train = extractor.predict(train.drop(columns=['target','customer_ID']))
features_train.shape

In [25]:
NN_ftr_tr=train['customer_ID']
temp=pd.DataFrame(features_train)
NN_ftr_tr=pd.concat([NN_ftr_tr,temp], axis=1)

col_name=['customer_ID']
for i in range(32):
    a = "NN_"+str(i)
    col_name.append(a)
    
NN_ftr_tr.columns=col_name
NN_ftr_tr.to_csv('NN_ftr_tr.csv')

In [26]:
del train, All, features_train, NN_ftr_tr

In [29]:
features_test = extractor.predict(test.drop(columns=['customer_ID']))
features_test.shape

NN_ftr_ts=test['customer_ID']
temp=pd.DataFrame(features_test)
NN_ftr_ts=pd.concat([NN_ftr_ts,temp], axis=1)
NN_ftr_ts.columns=col_name
NN_ftr_ts.to_csv('NN_ftr_ts.csv')