# Anchor explanations on the Iris dataset

In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from alibi.explainers import AnchorTabular

### Load iris dataset

In [5]:
dataset = load_iris()
feature_names = dataset.feature_names
class_names = list(dataset.target_names)
print(feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


Define training and test set

In [6]:
idx = 145
X_train,Y_train = dataset.data[:idx,:], dataset.target[:idx]
X_test, Y_test = dataset.data[idx+1:,:], dataset.target[idx+1:]

In [16]:
print(X_test)

[[6.3 2.5 5.  1.9]
 [6.5 3.  5.2 2. ]
 [6.2 3.4 5.4 2.3]
 [5.9 3.  5.1 1.8]]


### Train Random Forest model

In [7]:
np.random.seed(0)
clf = RandomForestClassifier(n_estimators=50)
clf.fit(X_train, Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

Define predict function

In [8]:
predict_fn = lambda x: clf.predict_proba(x)

### Initialize and fit anchor explainer for tabular data

In [9]:
explainer = AnchorTabular(predict_fn, feature_names)

Discretize the ordinal features into quartiles

In [10]:
explainer.fit(X_train, disc_perc=[25, 50, 75])

### Getting an anchor

Below, we get an anchor for the prediction of the first observation in the test set. An anchor is a sufficient condition - that is, when the anchor holds, the prediction should be the same as the prediction for this instance.

In [11]:
idx = 0
print('Prediction: ', class_names[explainer.predict_fn(X_test[idx].reshape(1, -1))[0]])

Prediction:  virginica


We set the precision threshold to 0.95. This means that predictions on observations where the anchor holds will be the same as the prediction on the explained instance at least 95% of the time.

In [15]:
print(X_test[0])
print(explainer.predict_fn(X_test[0]))

[6.3 2.5 5.  1.9]


ValueError: Expected 2D array, got 1D array instead:
array=[6.3 2.5 5.  1.9].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [9]:
explanation = explainer.explain(X_test[idx], threshold=0.95)
print('Anchor: %s' % (' AND '.join(explanation['names'])))
print('Precision: %.2f' % explanation['precision'])
print('Coverage: %.2f' % explanation['coverage'])

Anchor: petal width (cm) > 1.80 AND sepal width (cm) <= 2.80
Precision: 0.99
Coverage: 0.07
