# Anchor explanations on the Iris dataset

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from alibi.explainers import AnchorTabular
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

In [2]:
data = pd.read_csv('titanic.csv')
data.rename(columns={'Survived': 'class'}, inplace=True)
data['Sex'] = data['Sex'].map({'male':0,'female':1})
data['Embarked'] = data['Embarked'].map({'S':0,'C':1,'Q':2})
data['Relatives'] = data['SibSp'] + data['Parch']

data = data.drop(['PassengerId', 'Name','Ticket','Cabin', 'SibSp', 'Parch'], axis=1)
data = data.dropna()

f = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Relatives']

features = data.drop('class', axis=1).values
training_features, testing_features, training_target, testing_target = \
    train_test_split(features, data['class'].values, random_state=None)

In [3]:
print(training_features)

[[  3.       0.      19.       0.       0.       0.    ]
 [  1.       0.      64.     263.       0.       5.    ]
 [  1.       0.      35.      26.2875   0.       0.    ]
 ...
 [  3.       0.      32.       7.75     2.       0.    ]
 [  3.       0.      28.       7.925    0.       2.    ]
 [  1.       1.      15.     211.3375   0.       1.    ]]


In [5]:
model = svm.SVC(gamma=0.001, C=100., probability=True)
    
model.fit(training_features, training_target)
certainty = metrics.accuracy_score(testing_target, model.predict(testing_features))
print("Accuracy:", certainty)

Accuracy: 0.797752808988764


In [6]:
predict_fn = lambda x: model.predict_proba(x)

In [10]:
predict_fn(np.array([[3, 0, 47, 7.25, 0, 0]]))

array([[0.79716029, 0.20283971]])

In [11]:
category_map = {0: ["1", "2", "3"], 1: ["0","1"], 4: ["0", "1", "2"]}

### Initialize and fit anchor explainer for tabular data

In [12]:
explainer = AnchorTabular(predict_fn, feature_names = f, categorical_names = category_map)

Discretize the ordinal features into quartiles

In [79]:
explainer.fit(training_features, disc_perc=[25, 50, 75])

### Getting an anchor

Below, we get an anchor for the prediction of the first observation in the test set. An anchor is a sufficient condition - that is, when the anchor holds, the prediction should be the same as the prediction for this instance.

In [84]:
idx = 0
#print('Prediction: ', explainer.predict_fn([np.array([3, 0, 50, 10.0, 0, 0])]))
print(model.predict_proba(np.array([1, 0, 60, 50, 0, 0]).reshape(1, -1)))
#print(testing_features[0])
print(np.array([3, 0, 22, 20, 0, 0]))

[[0.79364421 0.20635579]]
[ 3  0 22 20  0  0]


We set the precision threshold to 0.95. This means that predictions on observations where the anchor holds will be the same as the prediction on the explained instance at least 95% of the time.

In [92]:
explanation = explainer.explain(np.array([1, 0, 60, 50, 0, 0]), threshold=0.95)
print('Anchor: %s' % (' AND '.join(explanation['names'])))
print('Precision: %.2f' % explanation['precision'])
print('Coverage: %.2f' % explanation['coverage'])

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible anchor.
Anchor: Sex = 0 AND Relatives <= 0.00 AND Embarked = 0
Precision: 0.95
Coverage: 0.35
