In [None]:
import shap
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import mlflow

In [None]:
mlflow.models.evaluation.base.list_evaluators()  # show installed evaluators. Should be ['default', 'trubrics']

## Train model

In [None]:
# load UCI Adult dataset
X, y = shap.datasets.adult()
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

# train a boosting classifier
model = GradientBoostingClassifier().fit(X_train, y_train)

# set an evaluation dataset
eval_data = X_test
eval_data["label"] = y_test

## Save to mlflow with Trubrics validation

### Getting started with Trubrics evaluator

To get started with the Trubrics evaluator plugin, you need two parameters in the `evaluator_config`:
- **trubrics_path**: a Trubric file of validations (see below on how to build validations with Trubrics and save this file)
- **model**: your ML model (see [compatible models](https://trubrics.github.io/trubrics-sdk/models/) with Trubrics)

In [None]:
with mlflow.start_run() as run:
    model_uri = mlflow.sklearn.log_model(model, "sklearn_model").model_uri

    mlflow.evaluate(
        model=model_uri,
        model_type="classifier",
        data=eval_data,
        targets="label",
        evaluators="trubrics",
        evaluator_config={"trubric_path": "./my_first_trubric.json", "model": model}
    )

**To see your mlflow run, open a terminal and execute:**
```
(venv)$ mlflow ui
```

### Advanced usage of Trubrics evaluator

For a more advanced example of how you can customise the Trubrics evaluator, we add more parameters into the `evaluator_config`:
- **data_context**: specifying a `data_context` allows users to build validations on not just evaluation data, but also on your training data. This can be useful for validating if your model is overfitting, or if your training data and testing data follow similar distributions for example. See our [DataContext docs](https://trubrics.github.io/trubrics-sdk/data_context/) for more info.
- **failing_severity**: this allows you to change the severity that you would like the Trubric to fail on. Setting `failing_severity="warning"` means that any validation failure with a severity of "warning" or "error" will result in an overall failure of the Trubric and will raise an exception. The default behaviour is `failing_severity="error"`, meaning only validation failures with `severity="error"` will raise an exception.
- **tags**: any tags that you want to save to your Trubric (separate from MLFlow tags).
- **slicing_functions**: slicing functions in Trubrics allow you to validate your model performance on different data slices. See more [here](https://trubrics.github.io/trubrics-sdk/metrics/#3-data-slicing-functions).

To unlock the full power of Trubrics, you should look at building custom validations for your model. Read more about this [here](https://trubrics.github.io/trubrics-sdk/custom_validations/).

In [None]:
import pandas as pd

def age_young(df: pd.DataFrame) -> pd.DataFrame:
    return df.loc[df["Age"] < 25, :]

slicing_functions = {"young_adults": age_young}

In [None]:
from trubrics.context import DataContext

In [None]:
with mlflow.start_run() as run:
    model_uri = mlflow.sklearn.log_model(model, "sklearn_model").model_uri

    mlflow.evaluate(
        model=model_uri,
        model_type="classifier",
        data=eval_data,
        targets="label",
        evaluators="trubrics",
        evaluator_config={
            "trubric_path": "./my_second_trubric.json",
            "model": model,
            "data_context": DataContext(name="mlflow-demo-data", target="label", testing_data=eval_data),
            "failing_severity": "warning",
            "tags": ["mlflow-demo-tag"],
            "slicing_functions": slicing_functions   
        }
    )

## Build validations with Trubrics

Here we'll see how we can build some simple validations for our model. For more info on building validations, you can view our full tutorial [here](https://colab.research.google.com/github/trubrics/trubrics-sdk/blob/main/examples/classification_titanic/classification_full_demo.ipynb).

In [None]:
import rich

from trubrics.context import DataContext
from trubrics.validations import ModelValidator, Trubric
data_context = DataContext(
    target="label", testing_data=eval_data
)
model_validator = ModelValidator(data=data_context, model=model)
validations = [
    model_validator.validate_performance_against_threshold(metric="accuracy", threshold=0.8),
    model_validator.validate_performance_against_threshold(metric="recall", threshold=0.61, severity="warning"),
]

#rich.print(validations[0], validations[1])

trubric = Trubric(
    name="mlflow-demo",
    data_context_name=data_context.name,
    data_context_version=data_context.version,
    validations=validations
)

#trubric.save_local(path="./my_first_trubric.json")
rich.print(trubric)