In [1]:
import trustyai

import pyarrow.jvm as pvjm
trustyai.init()

%load_ext autoreload

%autoreload 2

from trustyai.explainers import SHAPExplainer, LimeExplainer, CounterfactualExplainer
from trustyai.local.counterfactual import simple_prediction
from trustyai.local.counterfactual import counterfactual_prediction
from trustyai.model import feature, output, PredictionInput, PredictionOutput
from trustyai.utils import TestUtils
import pandas as pd
import numpy as np
import time
from trustyai.model import Model, ArrowModel
from lime import lime_tabular

== Dev Version ==


SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.


# Set Global Feature Count

In [None]:
fs = 15

# Setup Models

In [2]:
nsamples = 10000
weights = np.random.rand(fs)
bg_arr = np.random.rand(100, fs)
exp_arr = np.random.rand(2, fs)
bg_pi   = [PredictionInput([feature(name='f{}'.format(i),value=x,dtype="number") for i, x in enumerate(row)]) for row in bg_arr]
exp_pi = [PredictionInput([feature(name='f{}'.format(i),value=x,dtype="number") for i, x in enumerate(row)]) for row in exp_arr]
bg_pd = pd.DataFrame(bg_arr, columns=['f{}' for i in range(fs)])
exp_pd = pd.DataFrame(bg_arr, columns=['f{}' for i in range(fs)])

In [8]:
numeric=True
# two output models  for TrustyAI, TrustyAI-Arrow, and Official 
def model_function(inputs):
    input_data = np.array([[feature.value.as_number() for feature in pi.features] for pi in inputs])
    output_1 = np.dot(input_data, weights)
    if not numeric:
        output_2 = [str(x) for x in input_data[:,0]]
        return [PredictionOutput([output(name="dot", dtype='number', value=output_1[i]),output(name="str", dtype='text', value=output_2[i])]) for i in range(len(output_1))]
    else:
        return [PredictionOutput([output(name="dot", dtype='number', value=output_1[i]),output(name="dot2", dtype='number', value=output_1[i])]) for i in range(len(output_1))]

def model_function_arrow(inputs):
    output1 = inputs.dot(weights)
    if not numeric:
        output2 = inputs['f0'].apply(str)
        return pd.DataFrame({'dot':output1,'str':output2})
    else:
        return pd.DataFrame({'dot':output1,'dot2':output1})

def model_function_pure_numpy(inputs):
    output = np.dot(inputs, weights)    
    return np.stack([output, output], 1)
    
# single output models for TrustyAI, TrustyAI-Arrow, and Official 
def model_function_one_out(inputs):
    input_data = np.array([[feature.value.as_number() for feature in pi.features] for pi in inputs])
    output_1 = np.dot(input_data, weights)
    return [PredictionOutput([output(name="dot", dtype='number', value=output_1[i])]) for i in range(len(output_1))]
    
def model_function_one_out_arrow(inputs):
    return pd.DataFrame({'dot':inputs.dot(weights)})
    
def model_function_one_out_pure_numpy(inputs):
    return np.dot(inputs, weights)
    
model = Model(model_function)
amodel = ArrowModel(model_function_arrow)

modelOneOut = Model(model_function_one_out)
modelOneOutArrow = ArrowModel(model_function_one_out_arrow)

# Lime Benchmarking
Numeric vs Numeric/Text models, TrustyAI vs TrustyAI-Arrow

In [4]:
numeric=True
prediction_outputs = model.predictAsync(exp_pi).get()
predictions = [simple_prediction(input_features=exp_pi[i].features, outputs=prediction_outputs[i].outputs) for i in range(1)]
print("=== Numeric:", numeric,"===")

t_start = time.time()
lime_explainer = LimeExplainer(normalise_weights=True, perturbations=2, samples=nsamples)
saliency_map = lime_explainer.explain(predictions[0], model)
print("TrustyAI time:", time.time()-t_start)

t_start = time.time()
lime_explainer = LimeExplainer(normalise_weights=True, perturbations=2, samples=nsamples)
saliency_map = lime_explainer.explainArrow(predictions[0], amodel)
print("Arrow time:",time.time()-t_start)

numeric=False
print("=== Numeric:", numeric,"===")
prediction_outputs = model.predictAsync(exp_pi).get()
predictions = [simple_prediction(input_features=exp_pi[i].features, outputs=prediction_outputs[i].outputs) for i in range(1)]

t_start = time.time()
lime_explainer = LimeExplainer(normalise_weights=True, perturbations=2, samples=nsamples)
saliency_map = lime_explainer.explain(predictions[0], model)
print("TrustyAI time:", time.time()-t_start)

t_start = time.time()
lime_explainer = LimeExplainer(normalise_weights=True, perturbations=2, samples=nsamples)
saliency_map = lime_explainer.explainArrow(predictions[0], amodel)
print("Arrow time:",time.time()-t_start)

=== Numeric: True ===
TrustyAI time: 2.6532397270202637




Arrow time: 1.175849199295044
=== Numeric: False ===
TrustyAI time: 2.0149922370910645
Arrow time: 1.2841930389404297


## TrustyAI vs TrustyAI-Arrow vs Official

In [5]:
prediction_outputs = modelOneOut.predictAsync(exp_pi).get()
predictions = [simple_prediction(input_features=exp_pi[i].features, outputs=prediction_outputs[i].outputs) for i in range(1)]

t_start = time.time()
lime_explainer = LimeExplainer(normalise_weights=False, perturbations=2, penalise_sparse_balance=False, samples=nsamples)
saliency_map = lime_explainer.explain(predictions[0], modelOneOut)
print("TrustyAI time:", time.time()-t_start)

t_start = time.time()
lime_explainer = LimeExplainer(normalise_weights=False, perturbations=2, penalise_sparse_balance=False, samples=nsamples)
saliency_map = lime_explainer.explainArrow(predictions[0], modelOneOutArrow)
print("Arrow time:",time.time()-t_start)

t_start = time.time()
lime_exp = lime_tabular.LimeTabularExplainer(bg_arr, mode="regression", feature_names=['f{}' for i in range(fs)])
explanation = lime_exp.explain_instance(exp_arr[0], model_function_one_out_pure_numpy, num_samples=nsamples)
print("Official time:", time.time()-t_start)

TrustyAI time: 1.6536273956298828
Arrow time: 0.4255807399749756
Official time: 7.399468898773193


# SHAP
TrustyAi vs TrustyAI-Arrow vs Official

In [9]:
numeric = True
prediction_outputs = model.predictAsync(exp_pi).get()
predictions = [simple_prediction(input_features=exp_pi[i].features, outputs=prediction_outputs[i].outputs) for i in range(1)]

t_start = time.time()
shap_explainer = SHAPExplainer(background=bg_pi, samples=nsamples)
explanation = shap_explainer.explain(predictions[0], model)
shap_values = np.array([[feature_importance.getScore() for feature_importance in saliency.getPerFeatureImportance()] for saliency in explanation.getSaliencies()])
print("TrustyAI time:", time.time()-t_start)

t_start = time.time()
shap_explainer = SHAPExplainer(background=bg_pi, samples=nsamples)
explanation = shap_explainer.explainArrow(predictions[0], amodel)
shap_values = np.array([[feature_importance.getScore() for feature_importance in saliency.getPerFeatureImportance()] for saliency in explanation.getSaliencies()])
print("Arrow time:", time.time()-t_start)

if fs<20:
    import shap
    t_start = time.time()
    ske = shap.KernelExplainer(model_function_pure_numpy, bg_arr)
    ske.shap_values(exp_arr[:1], nsamples=nsamples)
    print("Official time:", time.time()-t_start)

TrustyAI time: 129.9765191078186
Arrow time: 22.521101474761963


  0%|          | 0/1 [00:00<?, ?it/s]

Official time: 2.232710599899292


# Counterfactual Benchmarking
TrustyAi vs TrustyAI-Arrow

In [10]:
numeric = True
explainer = CounterfactualExplainer(steps=nsamples)
goal = [output(name="dot", dtype="number", value=0.0, score=1.0), output(name="dot2", dtype="number", value=0.0, score=1.0)]
domains = [(-10.0, 10.0)] * fs

prediction = counterfactual_prediction(
        input_features=exp_pi[0].getFeatures(),
        outputs=goal,
        domains=domains
)
t_start = time.time()
counterfactual_result = explainer.explain(prediction, model)
print("TrustyAI Time:",time.time()-t_start)

t_start = time.time()
counterfactual_result = explainer.explainArrow(prediction, amodel)
print("Arrow Time:", time.time()-t_start)

TrustyAI Time: 3.049556255340576
Arrow Time: 12.246750593185425
