In [1]:
#Dice
#COMPAS for bail decision
#Adult-Income for income prediction
#German-Credit for assessing credit risk
#Dataset from Lending Club for loan decisions: https://www.lendingclub.com/info/download-data.action

#Alibi

#AIX360

## Importing libraries

In [2]:
import time
import pickle
import dice_ml
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf

from sklearn.impute import *
from sklearn.metrics import *
from sklearn.ensemble import *
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.compose import make_column_selector as selector
from sklearn.preprocessing import OneHotEncoder, MaxAbsScaler, LabelEncoder

from imblearn.over_sampling import SMOTENC

from xmoai.problems.objectives import *
from xmoai.problems.restrictions import *
from xmoai.setup.configure import generate_counterfactuals_regression

from alibi.explainers import *

pd.set_option('display.max_columns', None)

In [3]:
tf.compat.v1.disable_eager_execution()
tf.keras.backend.clear_session()
tf.compat.v1.keras.backend.get_session().list_devices()




[_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456, -3721264022667589566),
 _DeviceAttributes(/job:localhost/replica:0/task:0/device:GPU:0, GPU, 9396617216, 1871858829580393319)]

# California Dataset

In [4]:
df_california = fetch_california_housing(as_frame=True)
df_california = pd.concat([df_california['data'], df_california['target']], axis=1)
df_california

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


## Defining required columns

In [5]:
num_indexes = []
cat_indexes = []

num_columns = df_california.drop('MedHouseVal', axis=1).columns.tolist()
cat_columns = []
integer_columns = ['HouseAge', 'Population']
target = 'MedHouseVal'

for i in range(df_california.shape[1]):
    col = df_california.columns[i]
    if col in num_columns:
        num_indexes.append(i)
    elif col in cat_columns:
        cat_indexes.append(i)

### Converting string-encoded categories to integers

In [6]:
label_encoders = {}

for col in cat_columns:
    encoder = LabelEncoder().fit(df_california[col])
    label_encoders[col] = encoder
    df_california[col] = encoder.transform(df_california[col])

## Training

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df_california.drop(target, axis=1), df_california[target], test_size=0.7, random_state=0)

In [8]:
one_hot_encode = False

# defining both numeric and categorical transformers
numeric_transformer = Pipeline(
    steps=[("imputer", KNNImputer()), ("scaler", MaxAbsScaler())]
)

# setting-up the preprocessing steps
if one_hot_encode:
    categorical_transformer = OneHotEncoder(handle_unknown='ignore', sparse=False)
    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, num_indexes),
            ("cat", categorical_transformer, cat_indexes),
        ]
    )
else:
    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, num_indexes + cat_indexes),
        ]
    )
    
# defining the model pipeline and training
model = Pipeline(
    steps=[("preprocessor", preprocessor),
           ("regressor", RandomForestRegressor(n_jobs=-1))]
).fit(X_train, y_train)

## Preparing counterfactual generation with DiCE

In [9]:
continuous_precision = {}
for col in num_columns:
    continuous_precision[col] = -df_california[col].astype(str).str.split('.').apply(lambda x: len(x[1])).max()

d = dice_ml.Data(dataframe=pd.concat([X_train, y_train], axis=1),
                 continuous_features=num_columns,
                 #continuous_features_precision=continuous_precision,
                 outcome_name=target)
m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor')
exp = dice_ml.Dice(d, m)

## Preparing counterfactual generation with XMOAI

In [10]:
columns = X_train.columns
categorical_columns_one_hot_encoded = []
categorical_columns_label_encoded = {}

if one_hot_encode and type(model)==Pipeline:
    for cat_col in cat_columns:
        columns_in_cat = [col for col in columns if col.startswith(f'{cat_col}_')]
        columns_in_cat = np.argwhere(np.isin(columns, columns_in_cat)).flatten()
        
        if len(columns_in_cat) > 0:
            categorical_columns_one_hot_encoded.append(columns_in_cat)
else:
    for i in range(len(X_train.columns)):
        if X_train.columns[i] in cat_columns:
            categorical_columns_label_encoded[i] = np.sort(X_train[X_train.columns[i]].unique())
        
display(categorical_columns_one_hot_encoded)
display(categorical_columns_label_encoded)

[]

{}

In [11]:
# generating counterfactuals
immutable_column_indexes = [] # let's say we can't change the last column

upper_bounds = np.array(X_train.max(axis=0)*1.0) # this is the maximum allowed number per column
lower_bounds = np.array(X_train.min(axis=0)*1.0) # this is the minimum allowed number per column.
# you may change the bounds depending on the needs specific to the individual being trained.

## Generating counterfactuals

In [12]:
#y_test>y_train.median()
values = y_train.describe()
permitted_low = [values['min'], values['50%']]
permitted_high = [values['50%'], values['max']]

number_of_elements = 75
idx_low = np.argwhere(y_test.values<y_train.median()).flatten()[:number_of_elements]
idx_high = np.argwhere(y_test.values>y_train.median()).flatten()[:number_of_elements]

In [13]:
dice_all_exp_low = exp.generate_counterfactuals(X_test.iloc[idx_low], total_CFs=100, desired_range=permitted_high)
dice_all_exp_high = exp.generate_counterfactuals(X_test.iloc[idx_high], total_CFs=100, desired_range=permitted_low)

100%|██████████████████████████████████████████████████████████████████████████████████| 75/75 [29:29<00:00, 23.60s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 75/75 [29:38<00:00, 23.71s/it]


In [14]:
mean_squared_error(y_test, model.predict(X_test), squared=False)

0.5587439521339255

In [15]:
results = {}

In [None]:
# individual to be evaluated
def calculate_metrics(df, X_current, y_desired, method, categorical_columns_label_encoded,
                      categorical_columns_one_hot_encoded):
    f1, prediction = get_difference_target_regression(model, df, y_desired, method)
    f2 = get_difference_attributes(df.values, X_current.values, ranges,
                                   categorical_columns_label_encoded,
                                   categorical_columns_one_hot_encoded)
    f3 = get_modified_attributes(df, X_current, categorical_columns_one_hot_encoded)
    
    return f1, f2, f3, prediction

ranges = (X_train.max() - X_train.min()).values

for index_to_refer in range(len(idx_low) + len(idx_high)):
    print(f'Processing {index_to_refer}')
    
    if index_to_refer < len(idx_low):
        X_current = X_test.iloc[[idx_low[index_to_refer]]]
        y_desired = values['75%']
        y_acceptable_range = permitted_high
    else:
        X_current = X_test.iloc[[idx_high[index_to_refer - len(idx_low)]]]
        y_desired = values['25%']
        y_acceptable_range = permitted_low

    # DiCE CFs
    print('Processing DiCE')
    try:
        if index_to_refer < len(idx_low):
            df_dice = dice_all_exp_low.cf_examples_list[index_to_refer].final_cfs_df.copy().drop(target, axis=1)
        else:
            df_dice = dice_all_exp_high.cf_examples_list[index_to_refer - len(idx_low)].final_cfs_df.copy().drop(target, axis=1)

        f1, f2, f3, prediction = calculate_metrics(df_dice, X_current, y_desired, 'predict',
                                                   categorical_columns_label_encoded,
                                                   categorical_columns_one_hot_encoded)

        for col in label_encoders.keys():
            df_dice[col] = label_encoders[col].inverse_transform(df_dice[col].astype(int))

        df_dice = pd.concat([df_dice, pd.DataFrame(np.vstack([f1, f2, f3]).T, columns=['F1', 'F2', 'F3'])], axis=1)
        df_dice['Algorithm'] = 'DiCE'
    except:
        df_dice = pd.DataFrame()
        display('Error in DiCE. Resuming...')
    
    # XMOAI CFs
    print('Processing XMOAI')
    try:
        front, X_generated, algorithms = generate_counterfactuals_regression(model, X_train,
                                  X_current.iloc[0], y_desired, immutable_column_indexes,
                                  y_acceptable_range, upper_bounds, lower_bounds,
                                  categorical_columns_label_encoded, categorical_columns_one_hot_encoded,
                                  num_indexes, n_gen=100, pop_size=100, max_changed_vars=5,
                                  verbose=False, select_best=True, seed=0)

        df_xmoai = pd.DataFrame(X_generated.copy(), columns=X_test.columns)
        for col in label_encoders.keys():
            df_xmoai[col] = label_encoders[col].inverse_transform(df_xmoai[col].astype(int))

        df_xmoai = pd.concat([df_xmoai, pd.DataFrame(front, columns=['F1', 'F2', 'F3'])], axis=1)
        df_xmoai['Algorithm'] = 'Proposal'
    except:
        df_xmoai = pd.DataFrame()
        display('Error in XMOAI. Resuming...')
    
    results[X_current.index[0]] = pd.concat([df_dice, df_xmoai]).reset_index(drop=True)

Processing 0
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 1
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 2
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 3
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 4
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 5
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 6
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

Processing 7
Processing DiCE
Processing XMOAI


invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encountered in power
invalid value encoun

In [None]:
import pickle
pickle.dump([results, X_test.iloc[np.concatenate([idx_low, idx_high])]], open('california_results.pkl', 'wb'))