In [1]:
import sys
relative_root = "../.."
sys.path.append(relative_root)

In [2]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score
import json
from examples.training import titanic_config
from pathlib import Path

## Load data & model

In [3]:
try:
    preprocessed_train = pd.read_csv(Path(relative_root) / titanic_config.LOCAL_TRAIN_FILENAME)
    preprocessed_test = pd.read_csv(Path(relative_root) / titanic_config.LOCAL_TEST_FILENAME)
    rf_model = joblib.load(Path(relative_root) / titanic_config.LOCAL_MODEL_FILENAME)
    with open(Path(relative_root) / titanic_config.LOCAL_FI_FILENAME, "r") as file:
        feature_importance = json.loads(file.read())
except FileNotFoundError:
    raise FileNotFoundError("To generate these files, run `make train-titanic`")

In [4]:
rf_model.predict(preprocessed_test)[0]  # test model inference

1

## Init trubrics context

In [5]:
from trubrics.context import DataContext, ModelContext

In [6]:
data_context = DataContext(
    name="my_first_dataset",
    version=0.1,
    training_data=preprocessed_train,
    testing_data=preprocessed_test,
    target_column=titanic_config.TARGET
)
model_context = ModelContext(
    name="my_first_model",
    version=0.1,
    estimator=rf_model,
    evaluation_function=accuracy_score
)

## Init trubrics validator

In [7]:
from trubrics.validators.base import Validator

In [8]:
model_validator = Validator(data=data_context, model=model_context)

## Use the trubrics validator to create out-of-the-box validations

In [9]:
edge_case = {
    "Sex": "male",
    "Embarked": "S",
    "Title": "Master",
    "Pclass": 2,
    "Age": 28,
    "SibSp": 0,
    "Parch": 0,
    "Fare": 37
}

In [10]:
robustness = [
    model_validator.validate_single_edge_case(edge_case_data=edge_case, desired_output=0), # example of fail
    model_validator.validate_single_edge_case(edge_case_data=edge_case, desired_output=1) # example of pass
]
robustness[0].severity = "warning"  # change the severity of a validation
robustness[1].severity = "experiment"
robustness[0].dict()

{'validation_type': 'validate_single_edge_case',
 'validation_kwargs': {'args': [],
  'kwargs': {'edge_case_data': {'Sex': 'male',
    'Embarked': 'S',
    'Title': 'Master',
    'Pclass': 2,
    'Age': 28,
    'SibSp': 0,
    'Parch': 0,
    'Fare': 37},
   'desired_output': 0}},
 'outcome': 'fail',
 'result': {'prediction': '1'}}

In [11]:
performance = [
    model_validator.validate_performance_against_threshold(threshold=0.8),
    model_validator.validate_performance_against_threshold(threshold=0.75)
]
performance[0].dict()

{'validation_type': 'validate_performance_against_threshold',
 'validation_kwargs': {'args': [], 'kwargs': {'threshold': 0.8}},
 'outcome': 'fail',
 'severity': 'error',
 'result': {'performance': '0.7966101694915254'}}

In [12]:
fairness = [
    model_validator.validate_biased_performance_across_category(category="Embarked", threshold=0.1),
    model_validator.validate_biased_performance_across_category(category="Sex", threshold=0.05)
]
fairness[0].dict()

{'validation_type': 'validate_biased_performance_across_category',
 'validation_kwargs': {'args': [],
  'kwargs': {'category': 'Embarked', 'threshold': 0.1}},
 'outcome': 'fail',
 'severity': 'error',
 'result': {'max_performance_difference': '0.15235267245317496'}}

In [13]:
explainability = [
    model_validator.validate_feature_in_top_n_important_features(feature="Sex_female", feature_importance=feature_importance, top_n_features=3),
    model_validator.validate_feature_in_top_n_important_features(feature="Age", feature_importance=feature_importance, top_n_features=2)
]
explainability[0].dict()

{'validation_type': 'validate_feature_in_top_n_important_features',
 'validation_kwargs': {'args': [],
  'kwargs': {'feature': 'Sex_female',
   'feature_importance': {'Pclass': 0.08027485982957666,
    'Age': 0.2109814530574326,
    'SibSp': 0.0537446367930372,
    'Parch': 0.036290576979770735,
    'Fare': 0.23012234475361906,
    'Sex_female': 0.0923012684295021,
    'Sex_male': 0.08321571192982573,
    'Embarked_C': 0.01263434518233411,
    'Embarked_Q': 0.007338584769864529,
    'Embarked_S': 0.015720887116309576,
    'Title_Capt': 0.000784360481368511,
    'Title_Col': 0.0012917613604970604,
    'Title_Dr': 0.0017767470075199022,
    'Title_Major': 0.0007732933748357258,
    'Title_Master': 0.009336584354394672,
    'Title_Miss': 0.02496022744830678,
    'Title_Mlle': 0.00014551965218339622,
    'Title_Mme': 8.535494893915394e-05,
    'Title_Mr': 0.11288027520793507,
    'Title_Mrs': 0.021862741975250895,
    'Title_Ms': 0.00027664450523918956,
    'Title_Rev': 0.00231564220337665

## Collect user feedback from app

In [14]:
from trubrics.utils.loader import get_business_feedback_data
# read test data and run single outlier test
try:
    data = get_business_feedback_data(tracking=False)
    display(data)
except FileNotFoundError:
    print("Please generate feedback from the streamlit app in order to read it back here")

{'feedback_type': 'Other',
 'metadata': {'description': 'model seems to have more errors for passengers who paid less - is this normal?',
  'what_if_input': {'Sex': {'0': 'male'},
   'Embarked': {'0': 'S'},
   'Title': {'0': 'Master'},
   'Pclass': {'0': 2},
   'Age': {'0': 28},
   'SibSp': {'0': 0},
   'Parch': {'0': 0},
   'Fare': {'0': 37}}}}

--> **DS response: "It isn't normal, the model should not be more accurate for different groups of people. I'll add a test for this."**

## Create custom validations

In [15]:
from trubrics.validators.validation_output import validation_output, validation_output_type


class CustomValidator(Validator):
    def __init__(self, data: DataContext, model: ModelContext):
        super().__init__(data, model)
        
    @validation_output
    def validate_performance_for_different_fares(self, fare_cutoff):
        return self._validate_performance_for_different_fares(fare_cutoff)

    def _validate_performance_for_different_fares(self, fare_cutoff: int = 50) -> validation_output_type:
        """
        Write your custom validation function here.
        
        Notes
        -----
            This method is separated from validate_performance_for_different_fares
            to apply @validation_output and for unit testing.

            The @validation_output decorator allows you to generate a Validation object,
            and must be used to be able to save your validation as part of a Trubric.
            This decorator requires you to return values with the same type as validation_output_type.
        """
        
        errors_df = self.trubrics_model.explore_test_set_errors()
        number_of_errors_by_fare_ratio = (
            errors_df.loc[lambda x: x["Fare"] <= fare_cutoff].shape[0]
            / errors_df.loc[lambda x: x["Fare"] > fare_cutoff].shape[0]
        )
        return (
            number_of_errors_by_fare_ratio > 0.5 and number_of_errors_by_fare_ratio < 1.5,
            {"number_of_errors_by_fare_ratio": round(number_of_errors_by_fare_ratio, 3)}
        )

In [16]:
model_custom_validator = CustomValidator(data=data_context, model=model_context)

In [17]:
custom = [model_custom_validator.validate_performance_for_different_fares(fare_cutoff=25)]
custom[0].severity = "warning"  # change the severity of a validation
custom[0].dict()

{'validation_type': 'validate_performance_for_different_fares',
 'validation_kwargs': {'args': [], 'kwargs': {'fare_cutoff': 25}},
 'outcome': 'pass',
 'result': {'number_of_errors_by_fare_ratio': '1.308'}}

## Save trubric

In [18]:
from trubrics.context import TrubricContext

validations = robustness + performance + fairness + explainability + custom

trubric_context = TrubricContext(
    name="my_first_trubric",
    model_context_name=model_context.name,
    model_context_version=model_context.version,
    data_context_name=data_context.name,
    data_context_version=data_context.version,
    validations=validations,
)

trubric_context.save_local(path="../data")

In [19]:
# or save to trubrics UI
try:
    trubric_context.save_ui(local_port=8000)
except:
    print("Looks like the Trubrics API is not currently deployed.")

Looks like the Trubrics API is not currently deployed.


In [20]:
trubric_context.dict()

{'name': 'my_first_trubric',
 'model_context_name': 'my_first_model',
 'model_context_version': 0.1,
 'data_context_name': 'my_first_dataset',
 'data_context_version': 0.1,
 'metadata': None,
 'validations': [{'validation_type': 'validate_single_edge_case',
   'validation_kwargs': {'args': [],
    'kwargs': {'edge_case_data': {'Sex': 'male',
      'Embarked': 'S',
      'Title': 'Master',
      'Pclass': 2,
      'Age': 28,
      'SibSp': 0,
      'Parch': 0,
      'Fare': 37},
     'desired_output': 0}},
   'outcome': 'fail',
   'result': {'prediction': '1'}},
  {'validation_type': 'validate_single_edge_case',
   'validation_kwargs': {'args': [],
    'kwargs': {'edge_case_data': {'Sex': 'male',
      'Embarked': 'S',
      'Title': 'Master',
      'Pclass': 2,
      'Age': 28,
      'SibSp': 0,
      'Parch': 0,
      'Fare': 37},
     'desired_output': 1}},
   'outcome': 'pass',
   'severity': 'experiment',
   'result': {'prediction': '1'}},
  {'validation_type': 'validate_performanc

## Execute trubric from file

In [21]:
from trubrics.validators.run import run_trubric

In [22]:
trubric = TrubricContext.parse_file("../data/my_first_trubric.json")

all_validation_results = run_trubric(
    data_context=data_context,
    model_context=model_context,
    custom_validator=model_custom_validator,
    trubric=trubric
)

In [23]:
for validation_result in all_validation_results:
    print(validation_result)

('validate_single_edge_case', 'experiment', 'pass')
('validate_performance_against_threshold', 'error', 'fail')
('validate_performance_against_threshold', 'error', 'pass')
('validate_biased_performance_across_category', 'error', 'fail')
('validate_biased_performance_across_category', 'error', 'pass')
('validate_feature_in_top_n_important_features', 'error', 'fail')
('validate_feature_in_top_n_important_features', 'error', 'pass')
