In [4]:
import random
import tensorflow as tf
import tensorflow.keras.backend as K

import numpy as np
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization, Activation, Embedding, Concatenate, Reshape, Lambda
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.optimizers import Adam, Nadam
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.utils import to_categorical

from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.preprocessing import OrdinalEncoder

import dice_ml
from dice_ml.utils import helpers # helper functions

from alibi.explainers import CounterFactualProto
from alibi.utils.mapping import ohe_to_ord, ord_to_ohe

import seaborn as sns
import matplotlib.pyplot as plt
import pickle

import time

from xmoai.setup.configure import generate_counterfactuals_regression
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [5]:
seed = 2020
tf.keras.backend.clear_session()
np.random.seed(seed) # NumPy
random.seed(seed) # Python
tf.random.set_seed(seed) # Tensorflow

In [6]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

<h5>Loading the dataset, models and metadata</h5>

In [7]:
target_col = 'isFraud'

[xgb, lgb, rf, X_val_encoded, y_val, cat_columns, cat_object_columns, cat_vars_ord] = pickle.load(open('training_data.pkl', 'rb'))

model = tf.keras.models.load_model('model_reg')

<h5>Choosing n individuals with a MAE less than 2</h5>

In [8]:
results_tf = np.abs(model.predict(X_val_encoded).flatten() - y_val.values)
results_xgb = np.abs(xgb.predict(X_val_encoded) - y_val.values)
results_lgb = np.abs(lgb.predict(X_val_encoded) - y_val.values)
results_rf = np.abs(rf.predict(X_val_encoded) - y_val.values)

In [9]:
idx_tf = np.where(results_tf < 2)
idx_xgb = np.where(results_xgb < 2)
idx_lgb = np.where(results_lgb < 2)
idx_rf = np.where(results_rf < 2)

In [10]:
n = 10
correct_indexes = np.intersect1d(np.intersect1d(np.intersect1d(idx_tf, idx_xgb), idx_lgb), idx_rf)[:n]

In [11]:
X_test = X_val_encoded.iloc[correct_indexes]
y_test = y_val.iloc[correct_indexes]

In [12]:
correct_indexes = X_test.index

In [13]:
pickle.dump([X_test, y_test, correct_indexes], open('validation_dataset.pkl', 'wb'))