## Explain Your Model with Microsoft's InterpretML
* pip install -U interpret

In [1]:
from interpret.glassbox import ExplainableBoostingClassifier
from sklearn.model_selection import train_test_split

In [2]:
import pandas as pd
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
df = pd.read_csv('../data/winequality-red.csv') 
df['quality'] = df['quality'].astype(int)

Y = df['quality']
X =  df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol']]
X_featurenames = X.columns
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

FileNotFoundError: [Errno 2] No such file or directory: '../date/winequality-red.csv'

## (A) Explore the data

In [None]:
from interpret import show
from interpret.data import Marginal

marginal = Marginal().explain_data(X_train, Y_train, name = 'Train Data')
show(marginal)

## (B) Train the Explainable Boosting Machine (EBM)

In [23]:
from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree

lr = LinearRegression(random_state=seed)
lr.fit(X_train, Y_train)

rt = RegressionTree(random_state=seed)
rt.fit(X_train, Y_train)

ebm = ExplainableBoostingRegressor(random_state=seed)
ebm.fit(X_train, Y_train)  
# For Classifier, use ebm = ExplainableBoostingClassifier()

ExplainableBoostingRegressor(binning_strategy='uniform', data_n_episodes=2000,
                             early_stopping_run_length=50,
                             early_stopping_tolerance=1e-05,
                             feature_names=['fixed acidity', 'volatile acidity',
                                            'citric acid', 'residual sugar',
                                            'chlorides', 'free sulfur dioxide',
                                            'total sulfur dioxide', 'density',
                                            'pH', 'sulphates', 'alcohol'],
                             feature_step_n_inner_bags=0,
                             feature_types=['c...ntinuous',
                                            'continuous', 'continuous',
                                            'continuous', 'continuous',
                                            'continuous', 'continuous',
                                            'continuous', 'continuous',
   

## (C) How Does the EBM Model Perform?

In [35]:
from interpret import show
from interpret.perf import RegressionPerf

ebm_perf = RegressionPerf(ebm.predict).explain_perf(X_test, Y_test, name='EBM')
lr_perf = RegressionPerf(lr.predict).explain_perf(X_test, Y_test, name='Linear Regression')
rt_perf = RegressionPerf(rt.predict).explain_perf(X_test, Y_test, name='Regression Tree')
show(ebm_perf)
show(lr_perf)
show(rt_perf)

## (D) Global Interpretability - What the Model Says for All Data

In [24]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

## (E) Local Interpretability - What the Model Says for Individual Data

In [25]:
ebm_local = ebm.explain_local(X_test[:5], Y_test[:5], name='EBM')
show(ebm_local)

## (F) Put All in a Dashboard - This is the Best

In [36]:
show([marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_perf, ebm_global, ebm_local])