In [1]:
#! pip install trubrics

## Load titanic data

In [2]:
from trubrics.example import get_titanic_data_and_model
titanic_dataset, _, model = get_titanic_data_and_model()

## Init DataContext

In [3]:
from trubrics.context import DataContext

In [4]:
data_context = DataContext(
    name="my_first_dataset",
    version=0.1,
    testing_data=titanic_dataset,  # for assortment, this can just be your entire dataset
    target="Survived"  # for assortment this is your scoring column. If you have multiple scores to validate, you should create multiple data contexts. Create this column, even if it is full of zeros as you don't have a ground truth.
)

## Custom model / algorithm

In [5]:
class ExampleCustomModel:
    """
    This is a custom model that scores passengers based on probability of survival.
    """
    def __init__(self, model):
        self.model = model
        self._estimator_type = "regressor"

    def predict(self, df):
        """Scores of passengers, or probability that they will survive."""
        return self.model.predict_proba(df)[:, 1]

In [6]:
# example first 5 scores
ExampleCustomModel(model=model).predict(data_context.X_test)[:5]

array([0.99      , 0.07      , 0.06733333, 0.07      , 0.98      ])

In [7]:
custom_model = ExampleCustomModel(model=model)

## Build custom validations

Lets build the following custom validations:

**Data validations**
- validate that passengers with the title "master" are younger than a certain age

**Model validations**
- validate that the model scores are on average higher for females than for males

In [8]:
from trubrics.validations import ModelValidator

In [9]:
from trubrics.validations.validation_output import (
    validation_output,
    validation_output_type,
)

class CustomValidator(ModelValidator):
    def __init__(self, data: DataContext, model, slicing_functions=None):
        self.data = data
        self.model = model
        
    def _validate_master_age(self, age_limit_master):
        master_df = self.data.testing_data.loc[lambda df: df["Title"]=="Master"]
        errors_df = master_df.loc[lambda df: df["Age"] >= age_limit_master]
        return len(errors_df) == 0, {"errors_df": errors_df.to_dict()}

    @validation_output
    def validate_master_age(self, age_limit_master: int, severity=None):
        """Validate that passengers with the title "master" are younger than a certain age

        Args:
            age_limit_master: cut off value for master

        Returns:
            True for success, false otherwise. With a results dictionary giving dict of errors.
        """
        return self._validate_master_age(age_limit_master)
    
    def _validate_model_scores_females_higher(self):
        predictions_df = self.data.testing_data.assign(predictions=self.model.predict(data_context.X_test))
        def _average_score_sex(sex):
            return round(predictions_df.loc[predictions_df["Sex"]==sex, "predictions"].mean(), 3)
        
        score_female = _average_score_sex(sex="female")
        score_male = _average_score_sex(sex="male")
        return score_female > score_male, {"score_female": score_female, "score_male": score_male}
    
    @validation_output
    def validate_model_scores_females_higher(self, severity=None):
        """We want the model to score female passengers with a higher probability of survival,
        so we are validating the average scores are higher for females than for males.
        
        Returns:
            True for success, false otherwise. With a results dictionary giving mean scores for both populations.
        """
        return self._validate_model_scores_females_higher()

In [10]:
validator = CustomValidator(data=data_context, model=custom_model)

In [11]:
validations = [
    validator.validate_master_age(age_limit_master=13),
    validator.validate_model_scores_females_higher(severity="experiment")
]

## Save validations as a trubric

In [12]:
from trubrics.validations import Trubric

trubric = Trubric(
    trubric_name="my_first_trubric",
    model_name="my_model",
    model_version=0.1,
    data_context_name=data_context.name,
    data_context_version=data_context.version,
    metadata={"tag": "master"}, # tag any metadata here
    validations=validations,
)

In [13]:
# save trubric to a local .json
trubric.save_local(path=".")

2022-10-25 17:17:49.650 | INFO     | trubrics.validations.dataclass:save_local:107 - Trubric saved to my_first_trubric.json.


## Run trubric from CICDCT

The last step is to run the validations from the trubric .json against new data / models everytime there is any potential change. See our [CLI docs](https://trubrics.github.io/trubrics-sdk/trubrics_cli/) for info.