# ELI5

In [1]:
# Data preprocessing
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# ML algorithms
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# XAI
import eli5

In [2]:
# Obtenermos los datos para generar modelo
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
input_names = names[:len(names)-1] # quitamos la clase de los nombres 
df = read_csv("data/pima-indians-diabetes.csv", names=names)

In [3]:
# Dividimos el dataset en test(33%) y train(66%)
x = df.drop("class", axis=1)
y = df["class"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

In [4]:
# Decision Tree Classifier
dt_model = DecisionTreeClassifier().fit(x_train, y_train)
# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_features=3).fit(x_train, y_train)

### Decision Tree

In [5]:
# Probamos la precisión del modelo
y_predicted = dt_model.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Precisión: ",  accuracy*100, "%")

Precisión:  69.68503937007874 %


In [6]:
# Miramos la interpretabilidad del modelo
eli5.show_weights(dt_model, feature_names=input_names)

Weight,Feature
0.2965,plas
0.1751,mass
0.1619,age
0.1141,pres
0.1089,pedi
0.0592,preg
0.0518,skin
0.0325,test


In [7]:
# Miramos la interpretabilidad de la predicción
i = 0
eli5.show_prediction(dt_model, x_test.iloc[i], feature_names=input_names, show_feature_values=True)

Contribution?,Feature,Value
0.654,<BIAS>,1.0
0.316,mass,42.9
0.256,pres,78.0
0.109,pedi,0.516
-0.143,age,36.0
-0.191,plas,151.0


### Random Forest


In [8]:
# Probamos la precisión del modelo
y_predicted = rf_model.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Precisión: ",  accuracy*100, "%")

Precisión:  77.95275590551181 %


In [9]:
# Miramos la interpretabilidad del modelo
eli5.show_weights(rf_model, feature_names=input_names)

Weight,Feature
0.2815  ± 0.1349,plas
0.1637  ± 0.0932,mass
0.1378  ± 0.1007,age
0.1309  ± 0.0790,pedi
0.0856  ± 0.0607,pres
0.0776  ± 0.0672,preg
0.0661  ± 0.0735,test
0.0567  ± 0.0527,skin


In [10]:
# Miramos la interpretabilidad de la predicción
i = 10
eli5.show_prediction(rf_model, x_test.iloc[i], feature_names=input_names, show_feature_values=True)

Contribution?,Feature,Value
0.654,<BIAS>,1.0
0.128,age,22.0
0.022,plas,119.0
0.006,pedi,0.259
-0.014,skin,27.0
-0.023,test,0.0
-0.024,pres,66.0
-0.033,preg,0.0
-0.066,mass,38.8
