# Interpret (Microsoft)

In [1]:
# Pandas
import pandas as pd

# Some sklearn tools for preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Tree based algorithms
from sklearn.ensemble import RandomForestClassifier

# XAI
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

In [2]:
# Leemos el dataset.
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
features = names[:-1]
df = pd.read_csv("data/classification/pima-indians-diabetes.csv", names=names)
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
count_class = df.groupby("class").size()
count_class

class
0    500
1    268
dtype: int64

In [4]:
# Dividimos el dataset en train/test.
X = df.drop("class", axis=1)
Y = df["class"]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)

In [5]:
ebm = ExplainableBoostingClassifier()
ebm.fit(x_train, y_train)

ExplainableBoostingClassifier(binning_strategy='uniform', data_n_episodes=2000,
                              early_stopping_run_length=50,
                              early_stopping_tolerance=1e-05,
                              feature_names=['preg', 'plas', 'pres', 'skin',
                                             'test', 'mass', 'pedi', 'age'],
                              feature_step_n_inner_bags=0,
                              feature_types=['continuous', 'continuous',
                                             'continuous', 'continuous',
                                             'continuous', 'continuous',
                                             'continuous', 'continuous'],
                              holdout_size=0.15, holdout_split=0.15,
                              interactions=0, learning_rate=0.01,
                              max_tree_splits=2, min_cases_for_splits=2,
                              n_estimators=16, n_jobs=-2, random_state=42,
         

Understand the model:

In [6]:
ebm_global = ebm.explain_global()
show(ebm_global)

Understand individual predictions

In [7]:
ebm_local = ebm.explain_local(x_test, y_test)
show(ebm_local)