Skip to content

Commit

Permalink
Merge 083f15f into 7d5991a
Browse files Browse the repository at this point in the history
  • Loading branch information
MilenaTrajanoska committed Dec 16, 2021
2 parents 7d5991a + 083f15f commit 815a1f6
Show file tree
Hide file tree
Showing 4 changed files with 409 additions and 14 deletions.
6 changes: 3 additions & 3 deletions proto/src/constraints.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ message SummaryConstraintMsg {
oneof second {
string second_field = 3;
double value = 4;
SummaryBetweenConstraintMsg between = 5;
SummaryBetweenConstraintMsg between = 7;
}
Op op = 6;
bool verbose = 7;
Op op = 5;
bool verbose = 6;
}

message SummaryBetweenConstraintMsg {
Expand Down
5 changes: 3 additions & 2 deletions src/whylogs/core/columnprofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ def track(self, value, character_list=None, token_method=None):
if isinstance(value, str):
self.string_tracker.update(value, character_list=character_list, token_method=token_method)
# TODO: Implement real typed data conversion

self.constraints.update(value)

typed_data = TypedDataConverter.convert(value)

if not TypedDataConverter._are_nulls(typed_data):
Expand All @@ -130,8 +133,6 @@ def track(self, value, character_list=None, token_method=None):

self.number_tracker.track(typed_data)

self.constraints.update(typed_data)

def _unique_count_summary(self) -> UniqueCountSummary:
cardinality_summary = self.cardinality_tracker.to_summary(_UNIQUE_COUNT_BOUNDS_STD)
if cardinality_summary:
Expand Down
94 changes: 90 additions & 4 deletions src/whylogs/core/statistics/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __init__(self, op: Op, value=None, regex_pattern: str = None, name: str = No

@property
def name(self):
if getattr(self, "value", None):
if getattr(self, "value", None) is not None:
return self._name if self._name is not None else f"value {Op.Name(self.op)} {self.value}"
else:
return self._name if self._name is not None else f"value {Op.Name(self.op)} {self.regex_pattern}"
Expand All @@ -120,17 +120,32 @@ def update(self, v) -> bool:
def merge(self, other) -> "ValueConstraint":
if not other:
return self
val = None
pattern = None
assert self.name == other.name, f"Cannot merge constraints with different names: ({self.name}) and ({other.name})"
assert self.op == other.op, f"Cannot merge constraints with different ops: {self.op} and {other.op}"
assert self.value == other.value, f"Cannot merge value constraints with different values: {self.value} and {other.value}"
merged_value_constraint = ValueConstraint(op=self.op, value=self.value, name=self.name, verbose=self._verbose)
if all([getattr(v, "value", None) is not None for v in (self, other)]):
val = self.value
assert self.value == other.value, f"Cannot merge value constraints with different values: {self.value} and {other.value}"
elif all([getattr(v, "regex_pattern", None) for v in (self, other)]):
pattern = self.regex_pattern
assert (
self.regex_pattern == other.regex_pattern
), f"Cannot merge value constraints with different values: {self.regex_pattern} and {other.regex_pattern}"
else:
raise TypeError("Cannot merge a numeric value constraint with a string value constraint")

merged_value_constraint = ValueConstraint(op=self.op, value=val, regex_pattern=pattern, name=self.name, verbose=self._verbose)
merged_value_constraint.total = self.total + other.total
merged_value_constraint.failures = self.failures + other.failures
return merged_value_constraint

@staticmethod
def from_protobuf(msg: ValueConstraintMsg) -> "ValueConstraint":
return ValueConstraint(msg.op, msg.value, name=msg.name, verbose=msg.verbose)
if msg.regex_pattern != "":
return ValueConstraint(msg.op, regex_pattern=msg.regex_pattern, name=msg.name, verbose=msg.verbose)
else:
return ValueConstraint(msg.op, msg.value, name=msg.name, verbose=msg.verbose)

def to_protobuf(self) -> ValueConstraintMsg:
if hasattr(self, "value"):
Expand Down Expand Up @@ -568,3 +583,74 @@ def maxBetweenConstraint(lower_value=None, upper_value=None, lower_field=None, u

def maxLessThanEqualConstraint(value=None, field=None, verbose=False):
return SummaryConstraint("max", Op.LE, value=value, second_field=field, verbose=verbose)


def containsEmailConstraint(regex_pattern: "str" = None, verbose=False):
if regex_pattern is not None:
logger.warning("Warning: supplying your own regex pattern might cause slower evaluation of the containsEmailConstraint, depending on its complexity.")
email_pattern = regex_pattern
else:
email_pattern = (
r"^(?i)(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*"
r'|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|[\x01-\x09\x0b\x0c\x0e-\x7f])*")'
r"@"
r"(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)$"
)

return ValueConstraint(Op.MATCH, regex_pattern=email_pattern, verbose=verbose)


def containsCreditCardConstraint(regex_pattern: "str" = None, verbose=False):
if regex_pattern is not None:
logger.warning(
"Warning: supplying your own regex pattern might cause slower evaluation of the containsCreditCardConstraint, depending on its complexity."
)
credit_card_pattern = regex_pattern
else:
credit_card_pattern = (
r"^(?:(4[0-9]{3}([\s-]?[0-9]{4}){2}[\s-]?[0-9]{1,4})"
r"|(?:(5[1-5][0-9]{2}([\s-]?[0-9]{4}){3}))"
r"|(?:(6(?:011|5[0-9]{2})([\s-]?[0-9]{4}){3}))"
r"|(?:(3[47][0-9]{2}[\s-]?[0-9]{6}[\s-]?[0-9]{5}))"
r"|(?:(3(?:0[0-5]|[68][0-9])[0-9][\s-]?[0-9]{6}[\s-]?[0-9]{4}))"
r"|(?:2131|1800|35[0-9]{2,3}([\s-]?[0-9]{4}){3}))$"
)

return ValueConstraint(Op.MATCH, regex_pattern=credit_card_pattern, verbose=verbose)


def containsSSNConstraint(regex_pattern: "str" = None, verbose=False):
if regex_pattern is not None:
logger.warning("Warning: supplying your own regex pattern might cause slower evaluation of the containsSSNConstraint, depending on its complexity.")
ssn_pattern = regex_pattern
else:
ssn_pattern = r"^(?!000|666|9[0-9]{2})[0-9]{3}[\s-]?(?!00)[0-9]{2}[\s-]?(?!0000)[0-9]{4}$"

return ValueConstraint(Op.MATCH, regex_pattern=ssn_pattern, verbose=verbose)


def containsURLConstraint(regex_pattern: "str" = None, verbose=False):
if regex_pattern is not None:
logger.warning("Warning: supplying your own regex pattern might cause slower evaluation of the containsURLConstraint, depending on its complexity.")
url_pattern = regex_pattern
else:
url_pattern = (
r"^(?:http(s)?:\/\/)?((www)|(?:[a-zA-z0-9-]+)\.)"
r"(?:[-a-zA-Z0-9@:%._\+~#=]{1,256}\."
r"(?:[a-zA-Z0-9]{1,6})\b"
r"(?:[-a-zA-Z0-9@:%_\+.~#?&//=]*))$"
)

return ValueConstraint(Op.MATCH, regex_pattern=url_pattern, verbose=verbose)


def stringLengthEqualConstraint(length: int, verbose=False):

length_pattern = f"^.{{{length}}}$"
return ValueConstraint(Op.MATCH, regex_pattern=length_pattern, verbose=verbose)


def stringLengthBetweenConstraint(lower_value: int, upper_value: int, verbose=False):

length_pattern = rf"^.{{{lower_value},{upper_value}}}$"
return ValueConstraint(Op.MATCH, regex_pattern=length_pattern, verbose=verbose)
Loading

0 comments on commit 815a1f6

Please sign in to comment.