Skip to content

Commit

Permalink
Total executions and failures included in value constraints proto mes…
Browse files Browse the repository at this point in the history
…sages (#450)

* Add total executions and failures in ValueConstraintMsg

* Edit from_protobuf to use the total executions and failures

addresses #445

Co-authored-by: pecop2 <petar@loka.com>
  • Loading branch information
pecop2 and pecop2 committed Feb 22, 2022
1 parent 788b4be commit 224230c
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 2 deletions.
4 changes: 4 additions & 0 deletions proto/src/constraints.proto
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ message ValueConstraintMsg {
}
Op op = 3;
bool verbose = 4;
uint32 total = 8;
uint32 failures = 9;
}

message MultiColumnValueConstraintMsg {
Expand All @@ -112,6 +114,8 @@ message MultiColumnValueConstraintMsg {
Op op = 4;
bool verbose = 5;
Op internal_dependent_columns_op = 9;
uint32 total = 10;
uint32 failures = 11;
}

message ValueConstraintMsgs {
Expand Down
16 changes: 14 additions & 2 deletions src/whylogs/core/statistics/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,11 @@ def from_protobuf(msg: ValueConstraintMsg) -> "ValueConstraint":
else:
val = msg.value

return ValueConstraint(msg.op, value=val, regex_pattern=regex_pattern, apply_function=apply_function, name=name, verbose=msg.verbose)
constraint = ValueConstraint(msg.op, value=val, regex_pattern=regex_pattern, apply_function=apply_function, name=name, verbose=msg.verbose)
constraint.total = msg.total
constraint.failures = msg.failures

return constraint

def to_protobuf(self) -> ValueConstraintMsg:
set_vals_message = None
Expand Down Expand Up @@ -381,6 +385,8 @@ def to_protobuf(self) -> ValueConstraintMsg:
regex_pattern=regex_pattern,
function=apply_func,
verbose=self._verbose,
total=self.total,
failures=self.failures,
)

def report(self):
Expand Down Expand Up @@ -1201,9 +1207,13 @@ def from_protobuf(msg: MultiColumnValueConstraintMsg) -> "MultiColumnValueConstr
ref_cols = "all"
else:
raise ValueError("MultiColumnValueConstraintMsg should contain one of the attributes: value_set, value or reference_columns, but none were found")
return MultiColumnValueConstraint(
mcv_constraint = MultiColumnValueConstraint(
dependent_cols, msg.op, value=value, reference_columns=ref_cols, name=name, internal_dependent_cols_op=internal_op, verbose=msg.verbose
)
mcv_constraint.total = msg.total
mcv_constraint.failures = msg.failures

return mcv_constraint

def to_protobuf(self) -> MultiColumnValueConstraintMsg:
value = None
Expand Down Expand Up @@ -1251,6 +1261,8 @@ def to_protobuf(self) -> MultiColumnValueConstraintMsg:
reference_columns=ref_cols,
internal_dependent_columns_op=internal_op,
verbose=self._verbose,
total=self.total,
failures=self.failures,
)


Expand Down
29 changes: 29 additions & 0 deletions tests/unit/core/statistics/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2944,3 +2944,32 @@ def test_multicolumn_value_constraints_serialization_deserialization():
assert pytest.approx(sum_of_values["value"], 0.01) == 100
assert sum_of_values["internalDependentColumnsOp"] == Op.Name(Op.SUM)
assert sum_of_values["verbose"] is False


def test_value_constraints_executions_serialization(local_config_path, df_lending_club):

df = pd.DataFrame({"col1": list(range(100)), "col2": list(range(149, 49, -1))})

val_constraint = ValueConstraint(Op.LT, 10)

mc_val_constraint = columnValuesAGreaterThanBConstraint("col1", "col2")

value_constraints = {"col1": [val_constraint]}

dc = DatasetConstraints(None, value_constraints=value_constraints, multi_column_value_constraints=[mc_val_constraint])
config = load_config(local_config_path)
session = session_from_config(config)
profile = session.log_dataframe(df, "test.data", constraints=dc)
session.close()

report = dc.report()

val_constraint_proto = val_constraint.to_protobuf()
val_constraint_deser = ValueConstraint.from_protobuf(val_constraint_proto)
assert report[0][1][0][1] == val_constraint_proto.total == val_constraint_deser.total == 100
assert report[0][1][0][2] == val_constraint_proto.failures == val_constraint_deser.failures == 90

mc_val_constraint_proto = mc_val_constraint.to_protobuf()
mc_val_constraint_deser = MultiColumnValueConstraint.from_protobuf(mc_val_constraint_proto)
assert report[1][1] == mc_val_constraint_proto.total == mc_val_constraint_deser.total == 100
assert report[1][2] == mc_val_constraint_proto.failures == mc_val_constraint_deser.failures == 75

0 comments on commit 224230c

Please sign in to comment.