In [1]:
import sys
relative_root = "../.."
sys.path.append(relative_root)

In [2]:
import joblib
import rich
import pandas as pd
from sklearn.metrics import accuracy_score
import json
from examples.training import titanic_config
from pathlib import Path

## Load data & model

In [3]:
try:
    preprocessed_train = pd.read_csv(Path(relative_root) / titanic_config.LOCAL_TRAIN_FILENAME)
    preprocessed_test = pd.read_csv(Path(relative_root) / titanic_config.LOCAL_TEST_FILENAME)
    rf_model = joblib.load(Path(relative_root) / titanic_config.LOCAL_MODEL_FILENAME)
    with open(Path(relative_root) / titanic_config.LOCAL_FI_FILENAME, "r") as file:
        feature_importance = json.loads(file.read())
except FileNotFoundError:
    raise FileNotFoundError("To generate these files, run `make train-titanic`")

In [4]:
rf_model.predict(preprocessed_test)[0]  # test model inference

1

## Init trubrics context

In [5]:
from trubrics.context import DataContext

In [6]:
data_context = DataContext(
    name="my_first_dataset",
    version=0.1,
    training_data=preprocessed_train,
    testing_data=preprocessed_test,
    target_column=titanic_config.TARGET
)

## Init trubrics validator

In [7]:
from trubrics.validations import ModelValidator

In [8]:
import sklearn.metrics
rich.print("Choose error metric from sklearn defaults: ", list(sklearn.metrics.SCORERS))

In [9]:
model_validator = ModelValidator(metric="accuracy", data=data_context, model=rf_model)
rich.print(f"Test set {model_validator.metric} score", model_validator.score_test)

2022-09-12 10:05:44.013 | DEBUG    | trubrics.context:score_test:193 - Scoring test set.


## Use the trubrics validator to create out-of-the-box validations

In [10]:
edge_case = {
    "Sex": "male",
    "Embarked": "S",
    "Title": "Master",
    "Pclass": 2,
    "Age": 28,
    "SibSp": 0,
    "Parch": 0,
    "Fare": 37
}

In [11]:
robustness = [
    model_validator.validate_single_edge_case(edge_case_data=edge_case, desired_output=0), # example of fail
    model_validator.validate_single_edge_case(edge_case_data=edge_case, desired_output=1) # example of pass
]
robustness[0].severity = "warning"  # change the severity of a validation
robustness[1].severity = "experiment"

_ = [rich.print(val.dict()) for val in robustness]

In [12]:
performance = [
    model_validator.validate_performance_against_threshold(threshold=0.8),
    model_validator.validate_performance_against_threshold(threshold=0.75),
    model_validator.validate_performance_against_dummy(),
    model_validator.validate_performance_against_dummy(strategy="stratified"),
]
_ = [rich.print(val.dict()) for val in performance]

In [13]:
fairness = [
    model_validator.validate_biased_performance_across_category(category="Embarked", threshold=0.1),
    model_validator.validate_biased_performance_across_category(category="Sex", threshold=0.05)
]
_ = [rich.print(val.dict()) for val in fairness]

In [14]:
explainability = [
    model_validator.validate_feature_in_top_n_important_features(feature="Sex_female", feature_importance=feature_importance, top_n_features=3),
    model_validator.validate_feature_in_top_n_important_features(feature="Age", feature_importance=feature_importance, top_n_features=2)
]
_ = [rich.print(val.dict()) for val in explainability]

## Collect user feedback from app

In [15]:
from trubrics.utils.loader import get_business_feedback_data
# read test data and run single outlier test
try:
    data = get_business_feedback_data(tracking=False)
    display(data)
except FileNotFoundError:
    print("Please generate feedback from the streamlit app in order to read it back here")

{'feedback_type': 'Other',
 'metadata': {'description': 'model seems to have more errors for passengers who paid less - is this normal?',
  'what_if_input': {'Sex': {'0': 'male'},
   'Embarked': {'0': 'S'},
   'Title': {'0': 'Master'},
   'Pclass': {'0': 2},
   'Age': {'0': 28},
   'SibSp': {'0': 0},
   'Parch': {'0': 0},
   'Fare': {'0': 37}}}}

--> **DS response: "It isn't normal, the model should not be more accurate for different groups of people. I'll add a test for this."**

## Create custom validations

In [16]:
from trubrics.validations.validation_output import validation_output, validation_output_type


class CustomValidator(ModelValidator):
    def __init__(self, data: DataContext, model, metric: str):
        super().__init__(data, model, metric)
        
    @validation_output
    def validate_performance_for_different_fares(self, fare_cutoff):
        return self._validate_performance_for_different_fares(fare_cutoff)

    def _validate_performance_for_different_fares(self, fare_cutoff: int = 50) -> validation_output_type:
        """
        Write your custom validation function here.
        
        Notes
        -----
            This method is separated from validate_performance_for_different_fares
            to apply @validation_output and for unit testing.

            The @validation_output decorator allows you to generate a Validation object,
            and must be used to be able to save your validation as part of a Trubric.
            This decorator requires you to return values with the same type as validation_output_type.
        """
        
        errors_df = self.trubrics_model.testing_data_errors
        number_of_errors_by_fare_ratio = (
            errors_df.loc[lambda x: x["Fare"] <= fare_cutoff].shape[0]
            / errors_df.loc[lambda x: x["Fare"] > fare_cutoff].shape[0]
        )
        return (
            number_of_errors_by_fare_ratio > 0.5 and number_of_errors_by_fare_ratio < 1.5,
            {"number_of_errors_by_fare_ratio": round(number_of_errors_by_fare_ratio, 3)}
        )


In [17]:
model_custom_validator = CustomValidator(data=data_context, model=rf_model, metric="accuracy")

2022-09-12 10:05:44.924 | DEBUG    | trubrics.context:score_test:193 - Scoring test set.


In [18]:
custom = [model_custom_validator.validate_performance_for_different_fares(fare_cutoff=25)]
custom[0].severity = "warning"  # change the severity of a validation
_ = [rich.print(val.dict()) for val in custom]

2022-09-12 10:05:44.995 | DEBUG    | trubrics.context:predictions_test:180 - Predicting test set.


## Save trubric

In [19]:
from trubrics.context import TrubricContext

validations = robustness + performance + fairness + explainability + custom

trubric_context = TrubricContext(
    trubric_name="my_first_trubric",
    metric=model_validator.metric,
    model_name="my_model",
    data_context_name=data_context.name,
    data_context_version=data_context.version,
    metadata={"tag": "master"},
    validations=validations,
)

In [20]:
# save trubric to a local .json
trubric_context.save_local(path="../data")

2022-09-12 10:05:45.134 | INFO     | trubrics.context:save_local:316 - Trubric saved to ../data/my_first_trubric.json.


In [21]:
# or save to trubrics UI
user_id = None  # enter User ID from trubrics manager here
url = None  # enter api url for trubrics manager here

try:
    trubric_context.save_ui(url=url, user_id=user_id)
    print("Trubric saved to UI.")
except Exception as e:
    print(f"Error in saving to trubrics manager:\n{e}")

Error in saving to trubrics manager:
You must specify a 'user_id' to push to the trubrics manager.


## Execute trubric from file

In [22]:
from trubrics.validations.run import run_trubric

In [23]:
trubric = TrubricContext.parse_file("../data/my_first_trubric.json")

all_validation_results = run_trubric(
    data_context=data_context,
    model=rf_model,
    custom_validator=CustomValidator,
    trubric=trubric
)

In [24]:
for validation_result in all_validation_results:
    rich.print(f"{validation_result.validation_type} - {validation_result.severity.upper()}.......{validation_result.outcome}")

2022-09-12 10:05:45.458 | DEBUG    | trubrics.context:score_test:193 - Scoring test set.


2022-09-12 10:05:45.871 | DEBUG    | trubrics.context:predictions_test:180 - Predicting test set.


In [25]:
# or run from cli
import os
os.chdir("../..")
!make example-run-trubric 
os.chdir("examples/notebooks")

[34mRunning trubric from file 'examples/cli/trubric_run.py' with model 'my_model' and dataset 'my_dataset'.[0m
[1mvalidate_single_edge_case - EXPERIMENT[0m..............................................................[32m[1mPASSED[0m
[1mvalidate_performance_against_threshold - ERROR[0m......................................................[37m[41mFAILED[0m
[1mvalidate_performance_against_threshold - ERROR[0m......................................................[32m[1mPASSED[0m
[1mvalidate_performance_against_dummy - ERROR[0m..........................................................[32m[1mPASSED[0m
[1mvalidate_performance_against_dummy - ERROR[0m..........................................................[32m[1mPASSED[0m
[1mvalidate_biased_performance_across_category - ERROR[0m.................................................[37m[41mFAILED[0m
[1mvalidate_biased_performance_across_category - ERROR[0m.................................................[32m[1m