## Train model

In [None]:
import shap
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
import mlflow
from mlflow.models import MetricThreshold

# load UCI Adult Data Set; segment it into training and test sets
X, y = shap.datasets.adult()
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

# train a candidate XGBoost model
candidate_model = GradientBoostingClassifier().fit(X_train, y_train)

# train a baseline dummy model
baseline_model = DummyClassifier(strategy="uniform").fit(X_train, y_train)

# construct an evaluation dataset from the test set
eval_data = X_test
eval_data["label"] = y_test

## Build validations with Trubrics

In [None]:
from trubrics.context import DataContext

data_context = DataContext(
    target="label", testing_data=eval_data
)

In [None]:
from trubrics.validations import ModelValidator

model_validator = ModelValidator(data=data_context, model=candidate_model)

In [None]:
validations = [
    model_validator.validate_performance_against_threshold(metric="accuracy", threshold=0.8),
    model_validator.validate_performance_against_threshold(metric="recall", threshold=0.61, severity="warning"),
]

In [None]:
import rich

rich.print(validations[0], validations[1])

In [None]:
from trubrics.validations import Trubric

In [None]:
trubric = Trubric(
    name="mlflow-demo",
    data_context_name=data_context.name,
    data_context_version=data_context.version,
    validations=validations
)

In [None]:
rich.print(trubric.set_dynamic_fields())

In [None]:
# save trubric to a local .json
trubric.save_local(path="./my_first_trubric.json")

## Execute the trubric from file

The `TrubricRun` object allows you to pull in a Trubric from file, and run that directly against any model and DataContext.

In [None]:
from trubrics.validations.run import TrubricRun

In [None]:
trubric_from_file = Trubric.parse_file("./my_first_trubric.json")

trubric_run_context = TrubricRun(
    data_context=data_context,
    model=candidate_model,
    trubric=trubric_from_file,
    tags=["nb-demo-new"],
    failing_severity="warning",
)

## Save to mlflow

In [None]:
# Define criteria for model to be validated against
thresholds = {
    "accuracy_score": MetricThreshold(
        threshold=0.8,  # accuracy should be >=0.8
        min_absolute_change=0.05,  # accuracy should be at least 0.05 greater than baseline model accuracy
        min_relative_change=0.05,  # accuracy should be at least 5 percent greater than baseline model accuracy
        higher_is_better=True,
    ),
}

with mlflow.start_run() as run:
    candidate_model_uri = mlflow.sklearn.log_model(
        candidate_model, "candidate_model"
    ).model_uri
    baseline_model_uri = mlflow.sklearn.log_model(
        baseline_model, "baseline_model"
    ).model_uri
    
    new_trubric = trubric_run_context.set_new_trubric()
    mlflow.log_dict(new_trubric.dict(), artifact_file="demo-trubric.json")
    new_trubric.raise_trubric_failure()

    mlflow.evaluate(
        candidate_model_uri,
        eval_data,
        targets="label",
        model_type="classifier",
        validation_thresholds=thresholds,
        baseline_model=baseline_model_uri,
    )

In [None]:
rich.print(new_trubric)