# ELI5

In [1]:
# Data preprocessing
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# ML algorithms
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# XAI
import eli5

In [2]:
# Obtenermos los datos para generar modelo
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
input_names = names[:len(names)-1] # quitamos la clase de los nombres 
df = read_csv("data/classification/pima-indians-diabetes.csv", names=names)

In [3]:
# Dividimos el dataset en test(33%) y train(66%)
x = df.drop("class", axis=1)
y = df["class"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

In [4]:
# Decision Tree Classifier
dt_model = DecisionTreeClassifier().fit(x_train, y_train)
# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_features=3).fit(x_train, y_train)

### Decision Tree

In [5]:
# Probamos la precisión del modelo
y_predicted = dt_model.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Precisión: ",  accuracy*100, "%")

Precisión:  69.29133858267717 %


In [6]:
# Miramos la interpretabilidad del modelo
eli5.show_weights(dt_model, feature_names=input_names)

Weight,Feature
0.2477,plas
0.1706,mass
0.1615,age
0.158,pedi
0.1028,pres
0.076,preg
0.0721,test
0.0112,skin


In [7]:
# Miramos la interpretabilidad de la predicción
i = 0
eli5.show_prediction(dt_model, x_test.iloc[i], feature_names=input_names, show_feature_values=True)

Contribution?,Feature,Value
0.644,<BIAS>,1.0
0.167,age,21.0
0.127,plas,101.0
0.052,mass,21.0
0.011,pedi,0.252


### Random Forest


In [8]:
# Probamos la precisión del modelo
y_predicted = rf_model.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)
print("Precisión: ",  accuracy*100, "%")

Precisión:  79.13385826771653 %


In [9]:
# Miramos la interpretabilidad del modelo
eli5.show_weights(rf_model, feature_names=input_names)

Weight,Feature
0.2480  ± 0.1162,plas
0.1526  ± 0.0901,age
0.1478  ± 0.0873,mass
0.1461  ± 0.0796,pedi
0.0896  ± 0.0631,pres
0.0769  ± 0.0636,preg
0.0700  ± 0.0625,test
0.0691  ± 0.0561,skin


In [10]:
# Miramos la interpretabilidad de la predicción
i = 10
eli5.show_prediction(rf_model, x_test.iloc[i], feature_names=input_names, show_feature_values=True)

Contribution?,Feature,Value
0.64,<BIAS>,1.0
0.107,pedi,0.186
0.08,mass,26.8
0.053,pres,80.0
0.021,skin,0.0
0.02,test,0.0
-0.022,preg,5.0
-0.054,plas,132.0
-0.084,age,69.0
