### Import libraries

In [None]:
import sklearn
import pandas as pd
from lightgbm import LGBMClassifier

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

### Create split and clean data pipelines

In [None]:
def split_label(dataset, target_feature):
    X = dataset.drop([target_feature], axis=1)
    y = dataset[[target_feature]]
    return X, y

def clean_data(X, y, target_feature):
    features = X.columns.values.tolist()
    classes = y[target_feature].unique().tolist()
    pipe_cfg = {
        'num_cols': X.dtypes[X.dtypes == 'int64'].index.values.tolist(),
        'cat_cols': X.dtypes[X.dtypes == 'object'].index.values.tolist(),
    }
    num_pipe = Pipeline([
        ('num_imputer', SimpleImputer(strategy='median')),
        ('num_scaler', StandardScaler())
    ])
    cat_pipe = Pipeline([
        ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),
        ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
    feat_pipe = ColumnTransformer([
        ('num_pipe', num_pipe, pipe_cfg['num_cols']),
        ('cat_pipe', cat_pipe, pipe_cfg['cat_cols'])
    ])
    X = feat_pipe.fit_transform(X)
    print(pipe_cfg['cat_cols'])
    return X, feat_pipe, features, classes


### Load dataset and classify different types of features

In [None]:
train_data = pd.read_csv('adult-train.csv')
test_data = pd.read_csv('adult-test.csv')

target_feature = 'income'
categorical_features = ['workclass', 'education', 'marital-status',
                        'occupation', 'relationship', 'race', 'gender', 'native-country']

X_train_original, y_train = split_label(train_data, target_feature)
X_test_original, y_test = split_label(test_data, target_feature)


X_train, feat_pipe, features, classes = clean_data(X_train_original, y_train, target_feature)
y_train = y_train[target_feature].to_numpy()

X_test = feat_pipe.transform(X_test_original)
y_test = y_test[target_feature].to_numpy()

train_data[target_feature] = y_train
test_data[target_feature] = y_test

# Take 500 samples from the test data and 8000 samples from train data
test_data_sample = test_data.sample(n=500, random_state=5)
train_data_sample = train_data.sample(n=8000, random_state=5)

### Train the classification model on the training data

In [None]:
clf = LGBMClassifier(n_estimators=5)
model = clf.fit(X_train, y_train)

### Create model and data insights

In [None]:
#Import RAI libraries

from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights

In [None]:
# Create Dashboard pipeline and initialize a RAIInsights object upon which different components are loaded.

dashboard_pipeline = Pipeline(steps=[('preprocess', feat_pipe), ('model', model)])

rai_insights = RAIInsights(dashboard_pipeline, train_data, test_data_sample, target_feature, 'classification',
                               categorical_features=categorical_features)

### Add the components of RAI toolbox for model assessment

In [None]:
# Interpretability
rai_insights.explainer.add()
# Error Analysis
rai_insights.error_analysis.add()
# Counterfactuals
rai_insights.counterfactual.add(total_CFs=10, desired_class='opposite')

### Compute insights on the test set

In [None]:
rai_insights.compute()

### Visualize and explore the model insights on the given link

In [None]:
ResponsibleAIDashboard(rai_insights)