In [None]:
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import *
import pickle
import lime
from sklearn.preprocessing import MinMaxScaler
from Step0_cfreasoning import *

Here we generate a dataset to train an agent that evaluates the **LIME** explanation. For all local feature attribution explanations, we can swap out the function below with other explanation methods and evaluate in the same manner.

In [None]:
def get_lime_exp(X_samples, wrapped_fn):
    explainer = lime.lime_tabular.LimeTabularExplainer(X_samples, discretize_continuous=False, mode='regression')
    lime_exp = []

    for point in X_samples:
        exp_obj = explainer.explain_instance(point, wrapped_fn, num_features=2, num_samples=1000).as_list()
        exp_dict = dict(exp_obj)
        exp_arr = [exp_dict['0'], exp_dict['1']]
        lime_exp.append(exp_arr)

    lime_exp = np.array(lime_exp)
    return lime_exp

## Generate dataset for this explainer

This function generates a dataset where each data-point has an associated LIME explanation and ground truth label.

In [None]:
def generate_dataset(N_total_observations, N_points_each_fn):
    Xs = []
    ys = []
    us = []
    limes = []

    for i in range(N_total_observations):
        if i % 100 == 0:
            print(i)

        data_f1 = SaddlePoint2D()
        X_f, y_f, u_f = data_f1.generate_random_data(N = N_points_each_fn)

        # train-test split
        X_train, X_test, y_train, y_test, u_train, u_test = train_test_split(X_f, y_f, u_f, test_size=0.20, random_state=seed)
        pred_model = get_base_model()
        pred_model.fit(X_train, y_train)

        def wrapped_fn(x):
            p = pred_model.predict(x).reshape(-1, 1)
            return p

        X_explain, y_explain, u_explain = data_f1.sample_new_point()
        lime_explain = get_lime_exp(X_explain, wrapped_fn=wrapped_fn)

        Xs.append(X_explain)
        us.append(u_explain)    
        limes.append(lime_explain)
    
    return Xs, us, limes

## Write out dataset

In [None]:
def save_dict(X, u, lime_exp,
        filename: str):
    
    # dump X, Y, lime, and shap
    data_df = {}
    data_df['x'] = X
    data_df['u'] = u
    
    data_df['lime'] = lime_exp
    data_df['shap'] = shap_exp
    data_df['gam'] = gam_exp
    data_df['pred'] = pred
    
    pickle.dump(data_df, open(filename, "wb" ) )

In [None]:
Xs, us, limes = generate_dataset(6000, 5000)
save_dict(Xs, us, limes, "lime_dataset")