# [Interpret](https://github.com/interpretml/interpret)

InterpretML is an open-source package that incorporates state-of-the-art machine learning interpretability techniques under one roof.    
With this package, you can train interpretable glassbox models and explain blackbox systems.    
InterpretML helps you understand your model's global behavior, or understand the reasons behind individual predictions.



In [5]:
import warnings

import vaex
from vaex.ml.datasets import load_iris

warnings.filterwarnings('ignore')

df, test = load_iris().ml.train_test_split(test_size=0.2)
target = 'class_'

# feature engineering example
df['petal_ratio'] = df['petal_length'] / df['petal_width']
df[target] = df[target].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

## Modeling

In [6]:
from interpret.glassbox import ExplainableBoostingClassifier
import numpy as np
from sklearn.metrics import accuracy_score

features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width', 'petal_ratio']

ebm = ExplainableBoostingClassifier(feature_names=features)
ebm.fit(df[features].values, df[target].tolist())


# add predictions to dataframe
@vaex.register_function(on_expression=False)
def predict(*columns):
    return ebm.predict(np.array(columns).T)


df.add_function("predict", predict)
df["predictions"] = df.func.predict(*features)
accuracy = accuracy_score(df[target].values, df['predictions'].values)

print(f"Model accuracy: {accuracy}")
df.head(2)

Model accuracy: 1.0


#,sepal_length,sepal_width,petal_length,petal_width,class_,petal_ratio,predictions
0,5.4,3.0,4.5,1.5,versicolor,3,versicolor
1,4.8,3.4,1.6,0.2,setosa,8,setosa


## Explain global model

In [3]:
from interpret import show

ebm_global = ebm.explain_global()
feature_importance = ebm_global._internal_obj['overall']  # save to dataframe to recall later

show(ebm_global)

## Explain specific rows

In [4]:
from interpret import show

show(ebm.explain_local(df[features].values, df[target].tolist()))

We want to add as a column so we can see it in production as part of the APIs

In [5]:
from collections import defaultdict
import pyarrow as pa


@vaex.register_function(on_expression=False)
def explain(*columns):
    data = np.array(columns).T
    explanation = ebm.explain_local(data)._internal_obj['specific']
    if len(explanation) == 0:
        return pa.array([])
    label_names = explanation[0]['meta']['label_names']
    ret = []
    for e in explanation:
        tmp = defaultdict(dict)
        for i, label in enumerate(label_names):
            for feature, score in zip(e['names'], e['scores']):
                tmp[label][feature] = score[i]
        ret.append(dict(tmp))
    return pa.array(ret)


df.add_function("explain", explain)
df["explaination"] = df.func.explain(*features)
df.head(2)

#,sepal_length,sepal_width,petal_length,petal_width,class_,petal_ratio,predictions,explaination
0,5.4,3.0,4.5,1.5,versicolor,3,versicolor,"""{'setosa': {'petal_length': -0.5370391194010578..."
1,4.8,3.4,1.6,0.2,setosa,8,setosa,"""{'setosa': {'petal_length': 1.6797752424061698,..."


Add to dataframe for later analysis on test data.    
This is especially important if you have feature engineering and you want to work with unseen data.

In [6]:
from interpret import show


@vaex.register_function(on_expression=False)
def show_explanation(*columns):
    data = np.array(columns).T
    X, y = data[:, :-1], data[:, -1]
    show(ebm.explain_local(X, y))
    return y


df.add_function("show_explanation", show_explanation);

## Production pipeline

In [9]:
from goldilox import Pipeline

pipeline = Pipeline.from_vaex(df, description='An EBM model with explnations')
# pipeline.raw.pop('class_', None) # won't expect it in production (optional)

# add variables
pipeline.variables['feature_importance'] = feature_importance
pipeline.variables['accuracy'] = accuracy
pipeline.variables['features'] = features

assert pipeline.validate()

In [42]:
print(f"Saved to: {pipeline.save('pipeline.pkl')}")
print(f"Check out the docs: http://127.0.0.1:5000/docs\n")

!gl serve pipeline.pkl

Saved to: pipeline.pkl
Check out the docs: http://127.0.0.1:5000/docs

[2021-12-14 14:44:51 +0100] [23500] [INFO] Starting gunicorn 20.1.0
[2021-12-14 14:44:51 +0100] [23500] [INFO] Listening at: http://127.0.0.1:5000 (23500)
[2021-12-14 14:44:51 +0100] [23500] [INFO] Using worker: uvicorn.workers.UvicornH11Worker
[2021-12-14 14:44:51 +0100] [23503] [INFO] Booting worker with pid: 23503
[2021-12-14 14:44:51 +0100] [23503] [INFO] Started server process [23503]
[2021-12-14 14:44:51 +0100] [23503] [INFO] Waiting for application startup.
[2021-12-14 14:44:51 +0100] [23503] [INFO] Application startup complete.
^C
[2021-12-14 14:45:12 +0100] [23500] [INFO] Handling signal: int
[2021-12-14 14:45:12 +0100] [23500] [INFO] Shutting down: Master


# Post analysis
So we saved *show_explanation*, why?   
We can save load the pipeline, and try it locally on new data!
* TODO

In [50]:
from goldilox import Pipeline

pipeline = Pipeline.from_file('pipeline.pkl')
features = pipeline.variables['features']
target = pipeline.variables['target']
new_data = pipeline.inference(test)
new_data.head(2).func.show_explanation(*features, target)

INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


INFO:interpret.visual.udash:Generating mini dash
INFO:interpret.visual.udash:Generated mini dash


Expression = show_explanation(petal_length, petal_width, sepal_length,...
Length: 30 dtype: object (expression)
-------------------------------------
 0  versicolor
 1  versicolor
 2  versicolor
 3   virginica
 4      setosa
     ...      
25  versicolor
26  versicolor
27      setosa
28      setosa
29   virginica

INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_103
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_104
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_105
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_106
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_107
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_110
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_108
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_109
INFO:interpret.visual.udash:No overall plot to display: -1|ExplainableBoostingClassifier_111
