In [1]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score
import json
from examples.training import config
from pathlib import Path

In [2]:
# load data
relative_root = "../.."
try:
    preprocessed_train = pd.read_csv(Path(relative_root) / config.LOCAL_TRAIN_FILENAME)
    preprocessed_test = pd.read_csv(Path(relative_root) / config.LOCAL_TEST_FILENAME)
    rf_model = joblib.load(Path(relative_root) / config.LOCAL_MODEL_FILENAME)
    with open(Path(relative_root) / config.LOCAL_FI_FILENAME, "r") as file:
        feature_importance = json.loads(file.read())
except FileNotFoundError:
    raise FileNotFoundError("To generate these files, run `make train-titanic`")

## Init trubrics model

In [3]:
from trubrics.context import DataContext, ModelContext

In [4]:
data_context = DataContext(
    name="my_datasource",
    training_data=preprocessed_train,
    testing_data=preprocessed_test,
    target_column=config.TARGET
)
model_context = ModelContext(
    name="my_model",
    version="0.1",
    estimator=rf_model,
    evaluation_function=accuracy_score
)

In [5]:
from trubrics.validators.base import Validator

In [6]:
model_validator = Validator(data=data_context, model=model_context)

## DS example tests

In [7]:
edge_case = {
    "Sex": "male",
    "Embarked": "S",
    "Title": "Master",
    "Pclass": 2,
    "Age": 28,
    "SibSp": 0,
    "Parch": 0,
    "Fare": 37
}

In [8]:
robustness = [
    model_validator.validate_single_edge_case(edge_case_data=edge_case, desired_output=0), # example of fail
    model_validator.validate_single_edge_case(edge_case_data=edge_case, desired_output=1) # example of pass
]
robustness[0].dict()

{'validation_type': 'validate_single_edge_case',
 'validation_kwargs': {'args': [],
  'kwargs': {'edge_case_data': {'Sex': 'male',
    'Embarked': 'S',
    'Title': 'Master',
    'Pclass': 2,
    'Age': 28,
    'SibSp': 0,
    'Parch': 0,
    'Fare': 37},
   'desired_output': 0}},
 'outcome': 'fail',
 'result': {'prediction': '1'}}

In [9]:
performance = [
    model_validator.validate_performance_against_threshold(threshold=0.8),
    model_validator.validate_performance_against_threshold(threshold=0.75)
]
performance[0].dict()

{'validation_type': 'validate_performance_against_threshold',
 'validation_kwargs': {'args': [], 'kwargs': {'threshold': 0.8}},
 'outcome': 'fail',
 'result': {'performance': '0.7966101694915254'}}

In [10]:
fairness = [
    model_validator.validate_biased_performance_across_category(category="Embarked", threshold=0.1),
    model_validator.validate_biased_performance_across_category(category="Sex", threshold=0.05)
]
fairness[0].dict()

{'validation_type': 'validate_biased_performance_across_category',
 'validation_kwargs': {'args': [],
  'kwargs': {'category': 'Embarked', 'threshold': 0.1}},
 'outcome': 'fail',
 'result': {'max_performance_difference': '0.15235267245317496'}}

In [11]:
explainability = [
    model_validator.validate_feature_in_top_n_important_features(feature="Sex_female", feature_importance=feature_importance, top_n_features=3),
    model_validator.validate_feature_in_top_n_important_features(feature="Age", feature_importance=feature_importance, top_n_features=2)
]
explainability[0].dict()

{'validation_type': 'validate_feature_in_top_n_important_features',
 'validation_kwargs': {'args': [],
  'kwargs': {'feature': 'Sex_female',
   'feature_importance': {'Pclass': 0.08027485982957666,
    'Age': 0.2109814530574326,
    'SibSp': 0.0537446367930372,
    'Parch': 0.036290576979770735,
    'Fare': 0.23012234475361906,
    'Sex_female': 0.0923012684295021,
    'Sex_male': 0.08321571192982573,
    'Embarked_C': 0.01263434518233411,
    'Embarked_Q': 0.007338584769864529,
    'Embarked_S': 0.015720887116309576,
    'Title_Capt': 0.000784360481368511,
    'Title_Col': 0.0012917613604970604,
    'Title_Dr': 0.0017767470075199022,
    'Title_Major': 0.0007732933748357258,
    'Title_Master': 0.009336584354394672,
    'Title_Miss': 0.02496022744830678,
    'Title_Mlle': 0.00014551965218339622,
    'Title_Mme': 8.535494893915394e-05,
    'Title_Mr': 0.11288027520793507,
    'Title_Mrs': 0.021862741975250895,
    'Title_Ms': 0.00027664450523918956,
    'Title_Rev': 0.00231564220337665

## Business user example test

In [12]:
from trubrics.utils.loader import get_business_feedback_data
# read test data and run single outlier test
try:
    data = get_business_feedback_data(tracking=False)
    display(data)
except FileNotFoundError:
    print("Please generate feedback from the streamlit app in order to read it back here")

{'feedback_type': 'Other',
 'metadata': {'description': 'model seems to have more errors for passengers who paid less - is this normal?',
  'what_if_input': {'Sex': {'0': 'male'},
   'Embarked': {'0': 'S'},
   'Title': {'0': 'Master'},
   'Pclass': {'0': 2},
   'Age': {'0': 28},
   'SibSp': {'0': 0},
   'Parch': {'0': 0},
   'Fare': {'0': 37}}}}

--> **DS response: "It isn't normal, the model should not be more accurate for different groups of people. I'll add a test for this."**

In [13]:
from trubrics.base import BaseClassifier
from trubrics.validators.validation_output import validation_output, validation_output_type


class CustomValidator(BaseClassifier):
    def __init__(self, **kwargs):
        """
        Initlaise your class with BaseClassifier or BaseRegressor to use their methods
        on your data and model contexts.
        """
        super().__init__(**kwargs)

    @validation_output
    def validate_performance_for_different_fares(self, fare_cutoff: int = 50) -> validation_output_type:
        """
        Write your custom validation function here.
        
        Notes
        -----
            The @validation_output decorator allows you to generate a Validation object,
            and must be used to be able to save your validation as part of a Trubric.
            This decorator requires you to return values with the same type as validation_output_type.
        """
        
        errors_df = self.explore_test_set_errors()
        number_of_errors_by_fare_ratio = (
            errors_df.loc[lambda x: x["Fare"] <= fare_cutoff].shape[0]
            / errors_df.loc[lambda x: x["Fare"] > fare_cutoff].shape[0]
        )
        return (
            number_of_errors_by_fare_ratio > 0.5 and number_of_errors_by_fare_ratio < 1.5,
            {"number_of_errors_by_fare_ratio": round(number_of_errors_by_fare_ratio, 3)}
        )

In [14]:
model_custom_validator = CustomValidator(data=data_context, model=model_context)

In [15]:
custom = [model_custom_validator.validate_performance_for_different_fares(fare_cutoff=25)]
custom[0]

ValidationContext(validation_type='validate_performance_for_different_fares', validation_kwargs={'args': [], 'kwargs': {'fare_cutoff': 25}}, outcome='pass', result={'number_of_errors_by_fare_ratio': '1.308'})

## Save Trubric

In [16]:
from trubrics.context import TrubricContext

validations = robustness + performance + fairness + explainability + custom

TrubricContext(
    model_context=model_context,
    data_context=data_context,
    validations=validations,
).save(path="../data")