Starting with the basic pandas dataframe logging, consider the following input dataframe and its whylogs profile

In [1]:
import pandas as pd
import whylogs as ylog

data = {
    "animal": ["cat", "hawk", "snake", "cat", "mosquito"],
    "legs": [4, 2, 0, 4, 6],
    "weight": [4.3, 1.8, 1.3, 4.1, 5.5e-6],
}

results = ylog.log(pd.DataFrame(data))
profile = results.profile()

In [2]:
profile_view = profile.view()
profile_view.to_pandas()

Unnamed: 0_level_0,counts/n,counts/null,types/integral,types/fractional,types/boolean,types/string,types/object,cardinality/est,cardinality/upper_1,cardinality/lower_1,...,distribution/n,distribution/max,distribution/min,distribution/q_10,distribution/q_25,distribution/median,distribution/q_75,distribution/q_90,ints/max,ints/min
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
animal,5,0,0,0,0,5,0,4.0,4.0002,4.0,...,,,,,,,,,,
legs,5,0,5,0,0,0,0,4.0,4.0002,4.0,...,5.0,6.0,0.0,0.0,2.0,4.0,4.0,6.0,6.0,0.0
weight,5,0,0,5,0,0,0,5.0,5.00025,5.0,...,5.0,4.3,5e-06,5e-06,1.3,1.8,4.1,4.3,,


import a constraint for the Distribution metric and define upper bound on the "legs" column

In [5]:
column_view = profile_view.get_column("legs")

# constraint session bound to profile_view
builder = profile_view.constraint_builder() # consider if we can bind this to result_set (probably too much complexity)
builder.add_constraint(column_name="legs", Constraint(upper_bound = 6))
# object that implicitly points to the object from metric: import numeric_constraints
# numeric_constraints.bound(upper_bound = 6)
# numeric_constraints.bound(p90 < 0.23)

# uniqueness_constraints.bound(range)

# constraint interface? ABC involves projection of data to app form for the constraint check, inputs: params (or lambda)
# enum for mean, stddev
# string -> top_frequent item, see MV3.

builder.add_constraint(table_shape=(100,1000))

constraint_builder_result = builder.execute() # with lambda, might be expensive
constraint_builder_result2 = builder.execute() # might constraint_builder_result2 != constraint_builder_result
constraint_builder_result.to_pandas() # [name, pass, fail]

constraints = constraint_builder_result.get_constraint() #  can apply while logging?
constraint_builder_result.html_constraints_report()

print(column_view.to_summary_dict())
column_view.add_constraint(lower_bound=0)
column_view.add_constraint(upper_bound=6)
column_view.validate()

{'animal': <whylogs.core.view.column_profile_view.ColumnProfileView at 0x7f03526336d0>,
 'legs': <whylogs.core.view.column_profile_view.ColumnProfileView at 0x7f0352630bb0>,
 'weight': <whylogs.core.view.column_profile_view.ColumnProfileView at 0x7f039271db50>}

In [3]:
from whylogs.core.constraints import ColumnDistributionConstraints
from whylogs.viz import NotebookProfileVisualizer

visualization = NotebookProfileVisualizer()
# visualization.set_profiles(target_profile_view=profile_view)

distribution_contraints = ColumnDistributionConstraints(columnName="legs")

# mammals, birds, reptiles are expected to have 4 legs or less
distribution_contraints.add_constraint(quantile=1.0, upper_threshold=4)
validation_results = distribution_contraints.validate(profile_view)
print(validation_results)


ConstraintReport(name='DistributionConstraints[legs]', passed_count=0, failed_count=1)


In [4]:

# Can also generate an html report to vizualize the above instead of generating a report
distribution_contraints.constraints_report(profile_view)

# negative leg counts don't make sense, so define a lower bound
distribution_contraints.add_constraint(lower_threshold=0)

# ok, let's also include insects as expected 4 legs or less
distribution_contraints.add_constraint(upper_threshold=6)
visualization.constraints_report(constraints=distribution_contraints)




AttributeError: 'ColumnDistributionConstraints' object has no attribute 'constraints_report'