# ELI5

In [1]:
# Data preprocessing
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# ML algorithms
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# XAI
import eli5

In [2]:
# Obtenermos los datos para generar modelo
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
input_names = names[:len(names)-1] # quitamos la clase de los nombres 
df = read_csv("data/classification/pima-indians-diabetes.csv", names=names)

In [3]:
# Dividimos el dataset en test(33%) y train(66%)
x = df.drop("class", axis=1)
y = df["class"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

In [4]:
# Decision Tree Classifier
dt_model = DecisionTreeClassifier().fit(x_train, y_train)
# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_features=3).fit(x_train, y_train)

### Decision Tree

In [5]:
# Probamos la precisión del modelo
y_predicted = dt_model.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Precisión: ",  accuracy*100, "%")

Precisión:  73.62204724409449 %


In [6]:
# Miramos la interpretabilidad del modelo
eli5.show_weights(dt_model, feature_names=input_names)

Weight,Feature
0.275,plas
0.2207,mass
0.1099,preg
0.101,pedi
0.0913,age
0.0868,pres
0.082,skin
0.0334,test


In [7]:
# Miramos la interpretabilidad de la predicción
i = 0
eli5.show_prediction(dt_model, x_test.iloc[i], feature_names=input_names, show_feature_values=True)

Contribution?,Feature,Value
0.619,mass,39.5
0.35,<BIAS>,1.0
0.123,age,38.0
-0.032,plas,106.0
-0.06,pedi,0.286


### Random Forest


In [8]:
# Probamos la precisión del modelo
y_predicted = rf_model.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Precisión: ",  accuracy*100, "%")

Precisión:  75.98425196850394 %


In [9]:
# Miramos la interpretabilidad del modelo
eli5.show_weights(rf_model, feature_names=input_names)

Weight,Feature
0.2733  ± 0.1219,plas
0.1818  ± 0.0973,mass
0.1346  ± 0.0727,pedi
0.1260  ± 0.0899,age
0.0774  ± 0.0549,pres
0.0769  ± 0.0590,preg
0.0751  ± 0.0674,skin
0.0550  ± 0.0547,test


In [10]:
# Miramos la interpretabilidad de la predicción
i = 10
eli5.show_prediction(rf_model, x_test.iloc[i], feature_names=input_names, show_feature_values=True)

Contribution?,Feature,Value
0.654,<BIAS>,1.0
0.189,plas,95.0
0.074,pedi,0.161
0.04,preg,4.0
0.018,skin,0.0
0.008,test,0.0
-0.007,pres,64.0
-0.038,mass,32.0
-0.058,age,31.0
