In [8]:
from typing import Any
from whylogs.core.relations import Not, Predicate
import pandas as pd

text_data = {
    "transcriptions": [f"{letter}.{number}" for letter in "abcd" for number in range(100)],
}

df = pd.DataFrame(data=text_data)

def do_something_important(validator_name, condition_name: str, value: Any):
    print("Validator: {}\n    Condition name {} failed for value {}".format(validator_name, condition_name, value))
    return

X = Predicate()

transcriptions_conditions = {"even_a": Not(X.matches("^a.*[02468]$"))}

In [14]:
from whylogs.core.validators import ConditionValidator, ConditionValidatorConfig
from whylogs.core.schema import DatasetSchema
import whylogs as why
import pandas as pd

transcription_validator = ConditionValidator(
    name="even_a",
    conditions=transcriptions_conditions,
    actions=[do_something_important],
    enable_sampling=True,
)
#condition count config
condition_count_config = ConditionValidatorConfig(validator_sampling_size=10)
validators = {
    "transcriptions": [transcription_validator]}
schema = DatasetSchema(validators=validators, default_configs=condition_count_config)

profile = why.log(df, schema=schema).profile()
transcription_validator.sample_failed_conditions()

Validator: even_a
    Condition name even_a failed for value a.0
Validator: even_a
    Condition name even_a failed for value a.2
Validator: even_a
    Condition name even_a failed for value a.4
Validator: even_a
    Condition name even_a failed for value a.6
Validator: even_a
    Condition name even_a failed for value a.8
Validator: even_a
    Condition name even_a failed for value a.10
Validator: even_a
    Condition name even_a failed for value a.12
Validator: even_a
    Condition name even_a failed for value a.14
Validator: even_a
    Condition name even_a failed for value a.16
Validator: even_a
    Condition name even_a failed for value a.18
Validator: even_a
    Condition name even_a failed for value a.20
Validator: even_a
    Condition name even_a failed for value a.22
Validator: even_a
    Condition name even_a failed for value a.24
Validator: even_a
    Condition name even_a failed for value a.26
Validator: even_a
    Condition name even_a failed for value a.28
Validator: even

['a.48', 'a.2', 'a.40', 'a.30', 'a.92', 'a.10', 'a.32', 'a.68', 'a.46', 'a.34']

# Algorithm

```python
(* S has items to sample, R will contain the result *)
ReservoirSample(S[1..n], R[1..k])
  // fill the reservoir array
  for i = 1 to k
      R[i] := S[i]

  (* random() generates a uniform (0,1) random number *)
  W := exp(log(random())/k)

  while i <= n
      i := i + floor(log(random())/log(1-W)) + 1
      if i <= n
          (* replace a random item of the reservoir with item i *)
          R[randomInteger(1,k)] := S[i]  // random index between 1 and k, inclusive
          W := W * exp(log(random())/k)
```