Skip to content

Commit

Permalink
Add constraint mergeability (#345)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamie256 committed Nov 2, 2021
1 parent 046ffa9 commit cedaa7c
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 19 deletions.
108 changes: 90 additions & 18 deletions src/whylogs/core/statistics/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ def update(self, v) -> bool:
if self._verbose:
logger.info(f"value constraint {self.name} failed on value {v}")

def merge(self, other) -> "ValueConstraint":
if not other:
return self
assert self.name == other.name, f"Cannot merge constraints with different names: ({self.name}) and ({other.name})"
assert self.op == other.op, f"Cannot merge constraints with different ops: {self.op} and {other.op}"
assert self.value == other.value, f"Cannot merge value constraints with different values: {self.value} and {other.value}"
merged_value_constraint = ValueConstraint(op=self.op, value=self.value, name=self.name, verbose=self._verbose)
merged_value_constraint.total = self.total + other.total
merged_value_constraint.failures = self.failures + other.failures
return merged_value_constraint

@staticmethod
def from_protobuf(msg: ValueConstraintMsg) -> "ValueConstraint":
return ValueConstraint(msg.op, msg.value, name=msg.name, verbose=msg.verbose)
Expand Down Expand Up @@ -206,6 +217,23 @@ def update(self, summ: NumberSummary) -> bool:
if self._verbose:
logger.info(f"summary constraint {self.name} failed")

def merge(self, other) -> "SummaryConstraint":
if not other:
return self
assert self.name == other.name, f"Cannot merge constraints with different names: ({self.name}) and ({other.name})"
assert self.op == other.op, f"Cannot merge constraints with different ops: {self.op} and {other.op}"
assert self.value == other.value, f"Cannot merge constraints with different values: {self.value} and {other.value}"
assert self.first_field == other.first_field, f"Cannot merge constraints with different first_field: {self.first_field} and {other.first_field}"
assert self.second_field == other.second_field, f"Cannot merge constraints with different second_field: {self.second_field} and {other.second_field}"

merged_constraint = SummaryConstraint(
first_field=self.first_field, op=self.op, value=self.value, second_field=self.second_field, name=self.name, verbose=self._verbose
)

merged_constraint.total = self.total + other.total
merged_constraint.failures = self.failures + other.failures
return merged_constraint

@staticmethod
def from_protobuf(msg: SummaryConstraintMsg) -> "SummaryConstraint":
if msg.HasField("value") and not msg.HasField("second_field"):
Expand Down Expand Up @@ -251,60 +279,104 @@ def report(self):


class ValueConstraints:
def __init__(self, constraints: List[ValueConstraint] = []):
self.constraints = constraints
def __init__(self, constraints: Mapping[str, ValueConstraint] = None):
if constraints is None:
constraints = dict()

if isinstance(constraints, list):
self.constraints = {constraint.name: constraint for constraint in constraints}
else:
self.constraints = constraints

@staticmethod
def from_protobuf(msg: ValueConstraintMsgs) -> "ValueConstraints":
v = [ValueConstraint.from_protobuf(c) for c in msg.constraints]
if len(v) > 0:
return ValueConstraints(v)
value_constraints = [ValueConstraint.from_protobuf(c) for c in msg.constraints]
if len(value_constraints) > 0:
return ValueConstraints({v.name: v for v in value_constraints})
return None

def __getitem__(self, name: str) -> Optional[ValueConstraint]:
if self.contraints:
return self.constraints.get(name)
return None

def to_protobuf(self) -> ValueConstraintMsgs:
v = [c.to_protobuf() for c in self.constraints]
v = [c.to_protobuf() for c in self.constraints.values()]
if len(v) > 0:
vcmsg = ValueConstraintMsgs()
vcmsg.constraints.extend(v)
return vcmsg
return None

def update(self, v):
for c in self.constraints:
for c in self.constraints.values():
c.update(v)

def merge(self, other) -> "ValueConstraints":
if not other or not other.constraints:
return self

merged_constraints = other.constraints.copy()
for name, constraint in self.constraints:
merged_constraints[name] = constraint.merge(other.constraints.get(name))

return ValueConstraints(merged_constraints)

def report(self) -> List[tuple]:
v = [c.report() for c in self.constraints]
v = [c.report() for c in self.constraints.values()]
if len(v) > 0:
return v
return None


class SummaryConstraints:
def __init__(self, constraints: List[SummaryConstraint]):
self.constraints = constraints
def __init__(self, constraints: Mapping[str, SummaryConstraint] = None):
if constraints is None:
constraints = dict()

# Support list of constraints for back compat with previous version.
if isinstance(constraints, list):
self.constraints = {constraint.name: constraint for constraint in constraints}
else:
self.constraints = constraints

@staticmethod
def from_protobuf(msg: SummaryConstraintMsgs) -> "SummaryConstraints":
v = [SummaryConstraint.from_protobuf(c) for c in msg.constraints]
if len(v) > 0:
return SummaryConstraints(v)
constraints = [SummaryConstraint.from_protobuf(c) for c in msg.constraints]
if len(constraints) > 0:
return SummaryConstraints({v.name: v for v in constraints})
return None

def __getitem__(self, name: str) -> Optional[SummaryConstraint]:
if self.contraints:
return self.constraints.get(name)
return None

def to_protobuf(self) -> SummaryConstraintMsgs:
v = [c.to_protobuf() for c in self.constraints]
v = [c.to_protobuf() for c in self.constraints.values()]
if len(v) > 0:
scmsg = SummaryConstraintMsgs()
scmsg.constraints.extend(v)
return scmsg
return None

def update(self, v):
for c in self.constraints:
for c in self.constraints.values():
c.update(v)

def merge(self, other) -> "SummaryConstraints":

if not other or not other.constraints:
return self

merged_constraints = other.constraints.copy()
for name, constraint in self.constraints:
merged_constraints[name] = constraint.merge(other.constraints.get(name))

return SummaryConstraints(merged_constraints)

def report(self) -> List[tuple]:
v = [c.report() for c in self.constraints]
v = [c.report() for c in self.constraints.values()]
if len(v) > 0:
return v
return None
Expand All @@ -314,8 +386,8 @@ class DatasetConstraints:
def __init__(
self,
props: DatasetProperties,
value_constraints: Optional[Mapping[str, ValueConstraints]] = None,
summary_constraints: Optional[Mapping[str, SummaryConstraints]] = None,
value_constraints: Optional[ValueConstraints] = None,
summary_constraints: Optional[SummaryConstraints] = None,
):
self.dataset_properties = props
# repackage lists of constraints if necessary
Expand Down
43 changes: 42 additions & 1 deletion tests/unit/core/statistics/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def test_value_constraints_pattern_match(df_lending_club, local_config_path):


def test_summary_constraints(df_lending_club, local_config_path):

non_negative = SummaryConstraint("min", Op.GE, 0)

dc = DatasetConstraints(None, summary_constraints={"annual_inc": [non_negative]})
Expand All @@ -122,3 +121,45 @@ def test_summary_constraints(df_lending_club, local_config_path):
for each_feat in report:
for each_constraint in each_feat[1]:
assert each_constraint[1] == 1


def test_value_constraints_no_merge_different_names():
constraint1 = ValueConstraint(Op.LT, 1, name="c1")
constraint2 = ValueConstraint(Op.LT, 1, name="c2")
with pytest.raises(AssertionError):
constraint1.merge(constraint2)


def test_value_constraints_no_merge_different_values():
constraint1 = ValueConstraint(Op.LT, 1)
constraint2 = ValueConstraint(Op.LT, 2)
with pytest.raises(AssertionError):
constraint1.merge(constraint2)


def test_summary_constraints_no_merge_different_names():
constraint1 = SummaryConstraint("min", Op.GE, 0, name="non-negative")
constraint2 = SummaryConstraint("min", Op.GE, 0, name="positive-number")
with pytest.raises(AssertionError):
constraint1.merge(constraint2)


def test_summary_constraints_no_merge_different_values():
constraint1 = SummaryConstraint("min", Op.GE, 1, name="GreaterThanThreshold")
constraint2 = SummaryConstraint("min", Op.GE, 2, name="GreaterThanThreshold")
with pytest.raises(AssertionError):
constraint1.merge(constraint2)


def test_value_constraints_merge():
constraint1 = ValueConstraint(Op.LT, 1)
constraint2 = ValueConstraint(Op.LT, 1)
merged = constraint1.merge(constraint2)
assert merged.report() == ("value LT 1", 0, 0), "merging unlogged constraints should not change them from initiat state"


def test_value_constraints_merge_empty():
constraint1 = ValueConstraint(Op.LT, 1)
constraint2 = None
merged = constraint1.merge(constraint2)
assert merged == constraint1, "merging empty constraints should preserve left hand side"

0 comments on commit cedaa7c

Please sign in to comment.