# Anchor explanations on the Iris dataset

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from alibi.explainers import AnchorTabular
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('titanic.csv')
data.rename(columns={'Survived': 'class'}, inplace=True)
data['Sex'] = data['Sex'].map({'male':'Male','female':'Female'})
data['Embarked'] = data['Embarked'].map({'S':'Southampton','C':'Cherbourg','Q':'Queenstown'})
data['Pclass'] = data['Pclass'].map({1:'First', 2:'Second', 3:'Third'})
data['Relatives'] = data['SibSp'] + data['Parch']

data = data.drop(['PassengerId', 'Name','Ticket','Cabin', 'SibSp', 'Parch'], axis=1)
data = data.dropna()

f = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Relatives']

features = data.drop('class', axis=1)
#print(features)

training_features, testing_features, training_target, testing_target = \
    train_test_split(features, data['class'].values, random_state=None)

In [4]:
print(training_features)

Pclass     Sex   Age      Fare     Embarked  Relatives
483   Third  Female  63.0    9.5875  Southampton          0
688   Third    Male  18.0    7.7958  Southampton          0
336   First    Male  29.0   66.6000  Southampton          1
676   Third    Male  24.5    8.0500  Southampton          0
856   First  Female  45.0  164.8667  Southampton          2
..      ...     ...   ...       ...          ...        ...
757  Second    Male  18.0   11.5000  Southampton          0
104   Third    Male  37.0    7.9250  Southampton          2
155   First    Male  51.0   61.3792    Cherbourg          1
640   Third    Male  20.0    7.8542  Southampton          0
714  Second    Male  52.0   13.0000  Southampton          0

[534 rows x 6 columns]


In [5]:
numeric_features = ['Age', 'Fare', 'Relatives']
numeric_transformer = Pipeline(steps=[
    ('scaler', MinMaxScaler())])

categorical_features = ['Pclass', 'Sex', 'Embarked']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [7]:
model = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', svm.SVC(probability=True))])

In [8]:
model.fit(training_features, training_target)

Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  Pipeline(memory=None,
                                                           steps=[('scaler',
                                                                   MinMaxScaler(copy=True,
                                                                                feature_range=(0,
                                                                                               1)))],
                                                           verbose=False),
                                                  ['Age', 'Fare', 'Relatives']),
                                                 ('cat',
                                                  P

In [9]:
print("XGB {}".format(accuracy_score(testing_target, model.predict(testing_features))))

XGB 0.7921348314606742


In [10]:
def adapter(x):
    d = pd.DataFrame(data=x, columns = f)
    d['Sex'] = d['Sex'].map({0:'Male', 1: 'Female'})
    d['Embarked'] = d['Embarked'].map({0: 'Southampton', 1: 'Cherbourg', 2: 'Queenstown'})
    d['Pclass'] = d['Pclass'].map({0: 'First', 1: 'Second', 2: 'Third'})
    return d

In [11]:
def reverse_adapter(p):
    d = p.copy()
    d['Sex'] = d['Sex'].map({'Male': 0, 'Female': 1})
    d['Embarked'] = d['Embarked'].map({'Southampton': 0, 'Cherbourg': 1, 'Queenstown': 2})
    d['Pclass'] = d['Pclass'].map({'First': 0, 'Second': 1, 'Third': 2})
    n = d.to_numpy().astype(np.float)
    return(n)

In [12]:
#predict_fn = lambda x: model.predict_proba(adapter(x))
predict_fn = lambda x: model.predict(adapter(x))
#predict_fn = lambda x: model.predict_proba(pd.DataFrame(data=x, columns = f))

In [13]:
adapter(np.array([[2, 0, 47, 7.25, 0, 0]]))

Unnamed: 0,Pclass,Sex,Age,Fare,Embarked,Relatives
0,Third,Male,47.0,7.25,Southampton,0.0


In [14]:
predict_fn(np.array([[1, 1, 47, 7.25, 0, 0]]))

array([1])

### Initialize and fit anchor explainer for tabular data

In [15]:
#category_map = {0: ["1", "2", "3"], 1: ["0","1"], 4: ["0", "1", "2"]}
category_map = {0: ['First', 'Second', 'Third'], 1: ['Male','Female'], 4: ['Southampton', 'Cherbourg', 'Queenstown']} #must start at 0

In [16]:
explainer = AnchorTabular(predict_fn, feature_names = f, categorical_names = category_map)

Discretize the ordinal features into quartiles

In [17]:
anchor_training = reverse_adapter(training_features)


In [18]:
print(anchor_training)

[[ 2.      1.     63.      9.5875  0.      0.    ]
 [ 2.      0.     18.      7.7958  0.      0.    ]
 [ 0.      0.     29.     66.6     0.      1.    ]
 ...
 [ 0.      0.     51.     61.3792  1.      1.    ]
 [ 2.      0.     20.      7.8542  0.      0.    ]
 [ 1.      0.     52.     13.      0.      0.    ]]


In [19]:
explainer.fit(anchor_training, disc_perc=[25, 50, 75])

AnchorTabular(meta={
    'name': 'AnchorTabular',
    'type': ['blackbox'],
    'explanations': ['local'],
    'params': {'seed': None, 'disc_perc': [25, 50, 75]}
})

### Getting an anchor

Below, we get an anchor for the prediction of the first observation in the test set. An anchor is a sufficient condition - that is, when the anchor holds, the prediction should be the same as the prediction for this instance.

In [26]:
explanation = explainer.explain(np.array([[1, 1, 20, 80, 1, 0]]), threshold=0.90, max_anchor_size=3, batch_size=2000, tau=0.01)

print(explanation)
print('Anchor: %s' % (' AND '.join(explanation['data']['anchor'])))
#print('Precision: %.2f' % explanation['precision'])
#print('Coverage: %.2f' % explanation['coverage'])

Explanation(meta={
    'name': 'AnchorTabular',
    'type': ['blackbox'],
    'explanations': ['local'],
    'params': {
        'seed': None,
        'disc_perc': [25, 50, 75],
        'threshold': 0.9,
        'delta': 0.1,
        'tau': 0.01,
        'batch_size': 2000,
        'coverage_samples': 10000,
        'beam_size': 1,
        'stop_on_first': False,
        'max_anchor_size': 3,
        'min_samples_start': 100,
        'n_covered_ex': 10,
        'binary_cache_size': 10000,
        'cache_margin': 1000,
        'verbose': False,
        'verbose_every': 1,
        'kwargs': {}
    }
}, data={
    'anchor': ['Sex = Female', 'Embarked = Cherbourg'],
    'precision': numpy.float64(1.0),
    'coverage': numpy.float64(0.36891385767790263),
    'raw': {
        'feature': [1, 4],
        'mean': [numpy.float64(0.5833887595610243), numpy.float64(1.0)],
        'precision': [numpy.float64(0.5833887595610243), numpy.float64(1.0)],
        'coverage': [
            numpy.float64(0

In [28]:

rule = ""
names = explanation['data']['anchor']
precision = np.asarray(explanation['raw']['precision'])
precision[1:] -= precision[:-1].copy()
precision = [ round(elem, 2) for elem in precision.tolist() ] 


for i in range(0, len(names)):
    rule = rule + names[i]
    #importance = round(precision[i]/sum(precision)*100,2)

    #rule = rule + " (" + str(importance) + "%)"
    if (i < len(names)-1):
        rule = rule + " AND "
print(rule)

Sex = Female AND Embarked = Cherbourg


In [107]:
print(explanation)

{'names': ['Sex = Male', 'Pclass = Second', 'Embarked = Cherbourg'], 'precision': 0.9298701298701298, 'coverage': 0.0081, 'raw': {'feature': [1, 0, 4], 'mean': [0.8487689038870141, 0.8850762527233116, 0.9298701298701298], 'precision': [0.8487689038870141, 0.8850762527233116, 0.9298701298701298], 'coverage': [0.6342, 0.1357, 0.0081], 'examples': [{'covered': array([[  0.    ,   0.    ,  49.    , 110.8833,   1.    ,   2.    ],
       [  1.    ,   0.    ,  37.    ,  26.    ,   0.    ,   1.    ],
       [  2.    ,   0.    ,  40.5   ,  14.5   ,   0.    ,   2.    ],
       [  1.    ,   0.    ,  34.    ,  32.5   ,   0.    ,   2.    ],
       [  0.    ,   0.    ,  45.5   ,  28.5   ,   0.    ,   0.    ],
       [  2.    ,   0.    ,  22.    ,  10.5167,   0.    ,   0.    ],
       [  2.    ,   0.    ,  24.    ,   7.05  ,   0.    ,   0.    ],
       [  0.    ,   0.    ,  25.    ,  55.4417,   1.    ,   1.    ],
       [  2.    ,   0.    ,  41.    ,  20.2125,   0.    ,   2.    ],
       [  0.    ,  