In [None]:
import shap

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import mlflow

## Train model

In [None]:
# load UCI Adult Data Set; segment it into training and test sets
X, y = shap.datasets.adult()
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

# train a candidate XGBoost model
model = GradientBoostingClassifier().fit(X_train, y_train)

# construct an evaluation dataset from the test set
eval_data = X_test
eval_data["label"] = y_test

## Save to mlflow with Trubrics validation

In [None]:
with mlflow.start_run() as run:
    model_uri = mlflow.sklearn.log_model(model, "sklearn_model").model_uri

    mlflow.evaluate(
        model=model_uri,
        model_type="classifier",
        data=eval_data,
        targets="label",
        evaluators="trubrics",
        evaluator_config={
            "trubric_path": "./my_first_trubric.json",
            "model": model
        }
    )

## Build validations with Trubrics

In [None]:
BUILD = False

In [None]:
if BUILD:
    from trubrics.context import DataContext
    from trubrics.validations import ModelValidator, Trubric
    data_context = DataContext(
        target="label", testing_data=eval_data
    )
    model_validator = ModelValidator(data=data_context, model=model)
    validations = [
        model_validator.validate_performance_against_threshold(metric="accuracy", threshold=0.8),
        model_validator.validate_performance_against_threshold(metric="recall", threshold=0.61, severity="warning"),
    ]

    rich.print(validations[0], validations[1])

    trubric = Trubric(
        name="mlflow-demo",
        data_context_name=data_context.name,
        data_context_version=data_context.version,
        validations=validations
    )

    trubric.save_local(path="./my_first_trubric.json")