diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 2f58e0a775..cef94096b1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.3-dev5 +current_version = 0.4.1-dev0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/docs/conf.py b/docs/conf.py index 55a335ae22..bba7e90bf7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -101,7 +101,7 @@ # built documents. # # The short X.Y version. -version = "0.3.3-dev5" +version = "0.4.1-dev0" # The full version, including alpha/beta/rc tags. release = "" # Is set by calling `setup.py docs` diff --git a/proto b/proto index b3de84139e..083464b1e5 160000 --- a/proto +++ b/proto @@ -1 +1 @@ -Subproject commit b3de84139e5fbde1196c71797a62023153bbf8a0 +Subproject commit 083464b1e5fdc200b3118e8621b0e99346d045f9 diff --git a/requirements-dev.txt b/requirements-dev.txt index 725e66033a..2fd48b1427 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -19,6 +19,7 @@ colorama==0.4.4 coverage==5.3 cryptography==3.3.2 cycler==0.10.0 +scikit-learn==0.24.1 databricks-cli==0.14.1 decorator==4.4.2 distlib==0.3.1 diff --git a/setup.cfg b/setup.cfg index b5e23de22f..25b1fb2703 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ [metadata] name = whylogs -version = 0.3.3-dev5 +version = 0.4.1-dev0 description = Profile and monitor your ML data pipeline end-to-end author = WhyLabs.ai author-email = support@whylabs.ai diff --git a/src/whylogs/_version.py b/src/whylogs/_version.py index 3b75ad6dad..74c21bd0c9 100644 --- a/src/whylogs/_version.py +++ b/src/whylogs/_version.py @@ -1,3 +1,3 @@ """WhyLabs version number.""" -__version__ = "0.3.3-dev5" +__version__ = "0.4.1-dev0" diff --git a/src/whylogs/app/logger.py b/src/whylogs/app/logger.py index ebe5d280e9..2fd1b415b9 100644 --- a/src/whylogs/app/logger.py +++ b/src/whylogs/app/logger.py @@ -17,7 +17,7 @@ from whylogs.core import DatasetProfile, TrackImage, METADATA_DEFAULT_ATTRIBUTES, TrackBB from whylogs.core.statistics.constraints import DatasetConstraints from whylogs.io import LocalDataset - +from whylogs.proto import ModelType # TODO upgrade to Classes SegmentTag = Dict[str, any] @@ -124,9 +124,8 @@ def segmented_profiles(self, ) -> Dict[str, DatasetProfile]: def get_segment(self, segment: Segment) -> Optional[DatasetProfile]: hashed_seg = hash_segment(segment) - segment_profile = self._profiles[-1]["segmented_profiles"].get( + return self._profiles[-1]["segmented_profiles"].get( hashed_seg, None) - return segment_profile def set_segments(self, segments: Union[List[Segment], List[str]]) -> None: if segments: @@ -186,8 +185,7 @@ def _set_rotation(self, with_rotation_time: str = None): self.rotate_at = self.rotate_when(current_time) def rotate_when(self, time): - result = time + self.interval - return result + return time + self.interval def should_rotate(self, ): @@ -195,9 +193,7 @@ def should_rotate(self, ): return False current_time = int(datetime.datetime.utcnow().timestamp()) - if current_time >= self.rotate_at: - return True - return False + return current_time >= self.rotate_at def _rotate_time(self): """ @@ -212,7 +208,7 @@ def _rotate_time(self): time_tuple.strftime(self.suffix), self.suffix) # modify the segment datetime stamps - if (self.segments is None) or ((self.segments is not None) and self.profile_full_dataset): + if self.segments is None or self.profile_full_dataset: self._profiles[-1]["full_profile"].dataset_timestamp = log_datetime if self.segments is not None: for _, each_prof in self._profiles[-1]["segmented_profiles"].items(): @@ -255,11 +251,10 @@ def flush(self, rotation_suffix: str = None): for hashseg, each_seg_prof in self._profiles[-1]["segmented_profiles"].items(): seg_suffix = hashseg full_suffix = "_" + seg_suffix - if rotation_suffix is None: - writer.write(each_seg_prof, full_suffix) - else: + if rotation_suffix is not None: full_suffix += rotation_suffix - writer.write(each_seg_prof, full_suffix) + + writer.write(each_seg_prof, full_suffix) def full_profile_check(self, ) -> bool: """ @@ -342,13 +337,19 @@ def log_segment_datum(self, feature_name, value): def log_metrics(self, targets, predictions, - scores=None, target_field=None, prediction_field=None, + scores=None, + model_type: ModelType = None, + target_field=None, + prediction_field=None, score_field=None): self._profiles[-1]["full_profile"].track_metrics( - targets, predictions, scores, target_field=target_field, + targets, predictions, scores, + model_type=model_type, + target_field=target_field, prediction_field=prediction_field, - score_field=score_field) + score_field=score_field, + ) def log_image(self, image, @@ -413,7 +414,7 @@ def log_local_dataset(self, root_dir, folder_feature_name="folder_feature", imag if isinstance(data, pd.DataFrame): self.log_dataframe(data) - elif isinstance(data, Dict) or isinstance(data, list): + elif isinstance(data, (Dict, list)): self.log_annotation(annotation_data=data) elif isinstance(data, ImageType): if image_feature_transforms: @@ -508,10 +509,11 @@ def log_segments_keys(self, data): for each_segment in segments: try: segment_df = grouped_data.get_group(each_segment) - segment_tags = [] - for i in range(len(self.segments)): - segment_tags.append( - {"key": self.segments[i], "value": each_segment[i]}) + segment_tags = [ + {"key": self.segments[i], "value": each_segment[i]} + for i in range(len(self.segments)) + ] + self.log_df_segment(segment_df, segment_tags) except KeyError: continue @@ -522,7 +524,7 @@ def log_fixed_segments(self, data): for segment_tag in self.segments: # create keys segment_keys = [feature["key"] for feature in segment_tag] - seg = tuple([feature["value"] for feature in segment_tag]) + seg = tuple(feature["value"] for feature in segment_tag) grouped_data = data.groupby(segment_keys) diff --git a/src/whylogs/core/datasetprofile.py b/src/whylogs/core/datasetprofile.py index d032307e51..2e596deb41 100644 --- a/src/whylogs/core/datasetprofile.py +++ b/src/whylogs/core/datasetprofile.py @@ -24,6 +24,7 @@ DatasetProperties, DatasetSummary, MessageSegment, + ModelType ) from whylogs.core.statistics.constraints import DatasetConstraints, SummaryConstraints from whylogs.util import time @@ -131,9 +132,9 @@ def __init__( if columns is None: columns = {} if tags is None: - tags = dict() + tags = {} if metadata is None: - metadata = dict() + metadata = {} if session_id is None: session_id = uuid4().hex @@ -189,9 +190,14 @@ def add_output_field(self, field: Union[str, List[str]]): else: self.model_profile.add_output_field(field) - def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions: List[Union[str, bool, float, int]], scores: List[float] = None, - target_field: str = None, prediction_field: str = None, - score_field: str = None): + def track_metrics(self, + targets: List[Union[str, bool, float, int]], + predictions: List[Union[str, bool, float, int]], + scores: List[float] = None, + model_type: ModelType = None, + target_field: str = None, + prediction_field: str = None, + score_field: str = None, ): """ Function to track metrics based on validation data. @@ -206,6 +212,14 @@ def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions inferred/predicted values scores : List[float], optional assocaited scores for each inferred, all values set to 1 if not passed + target_field : str, optional + Description + prediction_field : str, optional + Description + score_field : str, optional + Description + model_type : ModelType, optional + Defaul is Classification type. target_field : str, optional prediction_field : str, optional score_field : str, optional @@ -214,8 +228,8 @@ def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions """ if self.model_profile is None: self.model_profile = ModelProfile() - self.model_profile.compute_metrics(predictions, targets, - scores, target_field=target_field, + self.model_profile.compute_metrics(predictions=predictions, targets=targets, + scores=scores, model_type=model_type, target_field=target_field, prediction_field=prediction_field, score_field=score_field) @@ -350,7 +364,8 @@ def generate_constraints(self) -> DatasetConstraints: Protobuf constraints message. """ self.validate() - constraints = [(name, col.generate_constraints()) for name, col in self.columns.items()] + constraints = [(name, col.generate_constraints()) + for name, col in self.columns.items()] # filter empty constraints constraints = [(n, c) for n, c in constraints if c is not None] return DatasetConstraints(self.to_properties(), None, dict(constraints)) @@ -826,10 +841,8 @@ def flatten_dataset_frequent_strings(dataset_summary: DatasetSummary): try: item_summary = getter( getter(col, "string_summary"), "frequent").items - items = {} - for item in item_summary: - items[item.value] = int(item.estimate) - if len(items) > 0: + items = {item.value: int(item.estimate) for item in item_summary} + if items: frequent_strings[col_name] = items except KeyError: continue diff --git a/src/whylogs/core/metrics/model_metrics.py b/src/whylogs/core/metrics/model_metrics.py index 8e8f416adb..558865af23 100644 --- a/src/whylogs/core/metrics/model_metrics.py +++ b/src/whylogs/core/metrics/model_metrics.py @@ -1,7 +1,8 @@ from typing import List, Union from whylogs.core.metrics.confusion_matrix import ConfusionMatrix -from whylogs.proto import ModelMetricsMessage +from whylogs.core.metrics.regression_metrics import RegressionMetrics +from whylogs.proto import ModelMetricsMessage, ModelType class ModelMetrics: @@ -10,22 +11,45 @@ class ModelMetrics: Attributes: confusion_matrix (ConfusionMatrix): ConfusionMatrix which keeps it track of counts with NumberTracker + regression_metrics (RegressionMetrics): Regression Metrics keeps track of a common regression metrics in case the targets are continous. """ - def __init__(self, confusion_matrix: ConfusionMatrix = None): - if confusion_matrix is None: - confusion_matrix = ConfusionMatrix() + def __init__(self, confusion_matrix: ConfusionMatrix = None, + regression_metrics: RegressionMetrics = None, + model_type: ModelType = ModelType.UNKNOWN): + + self.model_type = model_type + if confusion_matrix is not None and regression_metrics is not None: + raise NotImplementedError("Regression Metrics together with Confusion Matrix not implemented yet") + + if confusion_matrix is not None: + if (self.model_type == ModelType.REGRESSION): + raise NotImplementedError("Incorrent model type") + self.model_type = ModelType.CLASSIFICATION + self.confusion_matrix = confusion_matrix + if regression_metrics is not None: + if (self.model_type == ModelType.CLASSIFICATION): + raise NotImplementedError("Incorrent model type") + self.model_type = ModelType.REGRESSION + self.regression_metrics = regression_metrics + def to_protobuf(self, ) -> ModelMetricsMessage: - return ModelMetricsMessage(scoreMatrix=self.confusion_matrix.to_protobuf() if self.confusion_matrix else None) + return ModelMetricsMessage( + scoreMatrix=self.confusion_matrix.to_protobuf() if self.confusion_matrix else None, + regressionMetrics=self.regression_metrics.to_protobuf() if self.regression_metrics else None, + modelType=self.model_type) @classmethod def from_protobuf(cls, message, ): - return ModelMetrics(confusion_matrix=ConfusionMatrix.from_protobuf(message.scoreMatrix)) + return ModelMetrics( + confusion_matrix=ConfusionMatrix.from_protobuf(message.scoreMatrix), + regression_metrics=RegressionMetrics.from_protobuf(message.regressionMetrics), + model_type=message.modelType) - def compute_confusion_matrix(self, predictions: List[Union[str, int, bool]], - targets: List[Union[str, int, bool]], + def compute_confusion_matrix(self, predictions: List[Union[str, int, bool, float]], + targets: List[Union[str, int, bool, float]], scores: List[float] = None, target_field: str = None, prediction_field: str = None, @@ -48,12 +72,23 @@ def compute_confusion_matrix(self, predictions: List[Union[str, int, bool]], score_field=score_field) confusion_matrix.add(predictions, targets, scores) - if self.confusion_matrix.labels is None or self.confusion_matrix.labels == []: + if self.confusion_matrix is None or self.confusion_matrix.labels is None or self.confusion_matrix.labels == []: self.confusion_matrix = confusion_matrix else: self.confusion_matrix = self.confusion_matrix.merge( confusion_matrix) + def compute_regression_metrics(self, predictions: List[Union[float, int]], + targets: List[Union[float, int]], + target_field: str = None, + prediction_field: str = None): + regression_metrics = RegressionMetrics(target_field=target_field, prediction_field=prediction_field) + regression_metrics.add(predictions, targets) + if self.regression_metrics: + self.regression_metrics = self.regression_metrics.merge(regression_metrics) + else: + self.regression_metrics = regression_metrics + def merge(self, other): """ @@ -66,4 +101,15 @@ def merge(self, other): return self if self.confusion_matrix is None: return other - return ModelMetrics(confusion_matrix=self.confusion_matrix.merge(other.confusion_matrix)) + + if self.model_type is None or other.model_type is None: + model_type = ModelType.UNKNOWN + elif other.model_type != self.model_type: + model_type = ModelType.UNKNOWN + else: + model_type = self.model_type + + return ModelMetrics( + confusion_matrix=self.confusion_matrix.merge(other.confusion_matrix), + regression_metrics=self.regression_metrics.merge(other.regression_metrics), + model_type=model_type) diff --git a/src/whylogs/core/metrics/regression_metrics.py b/src/whylogs/core/metrics/regression_metrics.py new file mode 100644 index 0000000000..95a98c199c --- /dev/null +++ b/src/whylogs/core/metrics/regression_metrics.py @@ -0,0 +1,124 @@ +import math +from typing import List + +from sklearn.utils.multiclass import type_of_target + +from whylogs.proto import RegressionMetricsMessage + +SUPPORTED_TYPES = ("regression") + + +class RegressionMetrics: + + def __init__(self, + prediction_field: str = None, + target_field: str = None): + self.prediction_field = prediction_field + self.target_field = target_field + + self.count = 0 + self.sum_abs_diff = 0.0 + self.sum_diff = 0.0 + self.sum2_diff = 0.0 + # to add later + # self.nt_diff = whylogs.core.statistics.NumberTracker() + + def add(self, predictions: List[float], + targets: List[float]): + """ + Function adds predictions and targets computation of regression metrics. + + Args: + predictions (List[float]): + targets (List[float]): + + Raises: + NotImplementedError: in case targets do not fall into continuous support + ValueError: incase missing validation or predictions + """ + tgt_type = type_of_target(targets) + if tgt_type not in ("continuous"): + raise NotImplementedError(f"target type: {tgt_type} not supported for these metrics") + + # need to vectorize this + for idx, target in enumerate(targets): + + self.sum_abs_diff += abs(predictions[idx] - target) + self.sum_diff += predictions[idx] - target + self.sum2_diff += (predictions[idx] - target)**2 + # To add later + # self.nt_diff.track(predictions[idx] - target) + self.count += 1 + + def mean_absolute_error(self): + if self.count == 0: + return None + return self.sum_abs_diff / self.count + + def mean_squared_error(self): + if self.count == 0: + return None + return self.sum2_diff / self.count + + def root_mean_squared_error(self): + if self.count == 0: + return None + return math.sqrt(self.sum2_diff / self.count) + + def merge(self, other): + """ + Merge two seperate confusion matrix which may or may not overlap in labels. + + Args: + other : regression metrics to merge with self + Returns: + RegressionMetrics: merged regression metrics + """ + + if self.count == 0: + return other + if other.count == 0: + return self + + if self.prediction_field != other.prediction_field: + raise ValueError("prediction fields differ") + if self.target_field != other.target_field: + raise ValueError("target fields differ") + + new_reg = RegressionMetrics(prediction_field=self.prediction_field, + target_field=self.target_field) + new_reg.count = self.count + other.count + new_reg.sum_abs_diff = self.sum_abs_diff + other.sum_abs_diff + new_reg.sum_diff = self.sum_diff + other.sum_diff + new_reg.sum2_diff = self.sum2_diff + other.sum2_diff + + return new_reg + + def to_protobuf(self, ): + """ + Convert to protobuf + + Returns: + TYPE: Protobuf Message + """ + + return RegressionMetricsMessage( + prediction_field=self.prediction_field, + target_field=self.target_field, + count=self.count, + sum_abs_diff=self.sum_abs_diff, + sum_diff=self.sum_diff, + sum2_diff=self.sum2_diff) + + @classmethod + def from_protobuf(cls, message: RegressionMetricsMessage, ): + if message.ByteSize() == 0: + return None + + reg_met = RegressionMetrics() + reg_met.count = message.count + reg_met.sum_abs_diff = message.sum_abs_diff + reg_met.sum_diff = message.sum_diff + reg_met.sum2_diff = message.sum2_diff + + return reg_met diff --git a/src/whylogs/core/model_profile.py b/src/whylogs/core/model_profile.py index 45bae7d64d..1d59d97e8b 100644 --- a/src/whylogs/core/model_profile.py +++ b/src/whylogs/core/model_profile.py @@ -1,7 +1,7 @@ from sklearn.utils.multiclass import type_of_target import numpy as np -from whylogs.proto import ModelProfileMessage +from whylogs.proto import ModelProfileMessage, ModelType from whylogs.core.metrics.model_metrics import ModelMetrics SUPPORTED_TYPES = ("binary", "multiclass") @@ -13,10 +13,12 @@ class ModelProfile: Attributes ---------- - output_fields : list - list of fields that map to model outputs metrics : ModelMetrics the model metrics object + model_type : ModelType + Type of mode, CLASSIFICATION, REGRESSION, UNKNOWN, etc. + output_fields : list + list of fields that map to model outputs """ def __init__(self, @@ -38,6 +40,7 @@ def add_output_field(self, field: str): def compute_metrics(self, targets, predictions, scores=None, + model_type: ModelType = None, target_field=None, prediction_field=None, score_field=None @@ -48,14 +51,14 @@ def compute_metrics(self, targets, Parameters ---------- targets : List - targets (or actuals) for validation + targets (or actuals) for validation, if these are floats it is assumed the model is a regression type model predictions : List predictions (or inferred values) scores : List, optional - associated scores for each prediction + associated scores for each prediction (for binary and multiclass problems) target_field : str, optional prediction_field : str, optional - score_field : str, optional + score_field : str, optional (for binary and multiclass problems) Raises @@ -64,41 +67,50 @@ def compute_metrics(self, targets, """ tgt_type = type_of_target(targets) - if tgt_type not in ("binary", "multiclass"): - raise NotImplementedError("target type not supported yet") - # if score are not present set them to 1. - if scores is None: - scores = np.ones(len(targets)) - - scores = np.array(scores) - - # compute confusion_matrix - self.metrics.compute_confusion_matrix(predictions=predictions, - targets=targets, - scores=scores, - target_field=target_field, - prediction_field=prediction_field, - score_field=score_field) + if tgt_type in ("continuous") or model_type == ModelType.REGRESSION: + + self.metrics.compute_regression_metrics(predictions=predictions, + targets=targets, + target_field=target_field, + prediction_field=prediction_field) + self.metrics.model_type = ModelType.REGRESSION + + elif tgt_type in ("binary", "multiclass") or model_type == ModelType.CLASSIFICATION: + self.metrics.model_type = ModelType.CLASSIFICATION + + # if score are not present set them to 1. + if scores is None: + scores = np.ones(len(targets)) + + scores = np.array(scores) + + # compute confusion_matrix + self.metrics.compute_confusion_matrix(predictions=predictions, + targets=targets, + scores=scores, + target_field=target_field, + prediction_field=prediction_field, + score_field=score_field) + else: + raise NotImplementedError(f"target type {tgt_type} not supported yet") def to_protobuf(self): return ModelProfileMessage(output_fields=self.output_fields, - metrics=self.metrics.to_protobuf(), - ) + metrics=self.metrics.to_protobuf()) @classmethod def from_protobuf(cls, message: ModelProfileMessage): # convert google.protobuf.pyext._message.RepeatedScalarContainer to a list - output_fields = [] - for f in message.output_fields: - output_fields.append(f) - + output_fields = [f for f in message.output_fields] return ModelProfile(output_fields=output_fields, metrics=ModelMetrics.from_protobuf(message.metrics)) def merge(self, model_profile): if model_profile is None: return self + output_fields = list( set(self.output_fields + model_profile.output_fields)) metrics = self.metrics.merge(model_profile.metrics) + return ModelProfile(output_fields=output_fields, metrics=metrics) diff --git a/testdata/metrics/2021-02-12.parquet b/testdata/metrics/2021-02-12.parquet new file mode 100644 index 0000000000..7810aff3e3 Binary files /dev/null and b/testdata/metrics/2021-02-12.parquet differ diff --git a/testdata/metrics/2021-02-13.parquet b/testdata/metrics/2021-02-13.parquet new file mode 100644 index 0000000000..717e420246 Binary files /dev/null and b/testdata/metrics/2021-02-13.parquet differ diff --git a/testdata/metrics/regression_java.bin b/testdata/metrics/regression_java.bin new file mode 100644 index 0000000000..1a99694adf Binary files /dev/null and b/testdata/metrics/regression_java.bin differ diff --git a/tests/unit/core/metrics/test_model_metrics.py b/tests/unit/core/metrics/test_model_metrics.py index abe22e22bf..e3826ae148 100644 --- a/tests/unit/core/metrics/test_model_metrics.py +++ b/tests/unit/core/metrics/test_model_metrics.py @@ -1,8 +1,13 @@ +import pytest + +from whylogs.proto import ModelType from whylogs.core.metrics.model_metrics import ModelMetrics +from whylogs.core.metrics.confusion_matrix import ConfusionMatrix +from whylogs.core.metrics.regression_metrics import RegressionMetrics def tests_model_metrics(): - mod_met = ModelMetrics() + mod_met = ModelMetrics(model_type=ModelType.CLASSIFICATION) targets_1 = ["cat", "dog", "pig"] predictions_1 = ["cat", "dog", "dog"] @@ -12,7 +17,8 @@ def tests_model_metrics(): mod_met.compute_confusion_matrix(predictions_1, targets_1, scores_1) - print(mod_met.confusion_matrix.labels) + assert mod_met.model_type == ModelType.CLASSIFICATION + for idx, value in enumerate(mod_met.confusion_matrix.labels): for jdx, value_2 in enumerate(mod_met.confusion_matrix.labels): print(idx, jdx) @@ -21,7 +27,7 @@ def tests_model_metrics(): def tests_model_metrics_to_protobuf(): - mod_met = ModelMetrics() + mod_met = ModelMetrics(model_type=ModelType.CLASSIFICATION) targets_1 = ["cat", "dog", "pig"] predictions_1 = ["cat", "dog", "dog"] @@ -46,3 +52,12 @@ def test_merge_metrics_with_none_confusion_matrix(): other = ModelMetrics() other.confusion_matrix = None metrics.merge(other) + + + +def test_model_metrics_init(): + reg_met = RegressionMetrics() + conf_ma= ConfusionMatrix() + with pytest.raises(NotImplementedError): + metrics = ModelMetrics(confusion_matrix=conf_ma, regression_metrics=reg_met) + diff --git a/tests/unit/core/metrics/test_regression_metrics.py b/tests/unit/core/metrics/test_regression_metrics.py new file mode 100644 index 0000000000..46da5c3984 --- /dev/null +++ b/tests/unit/core/metrics/test_regression_metrics.py @@ -0,0 +1,73 @@ +import os + +import pandas as pd +import pytest + +from whylogs.core.metrics.regression_metrics import RegressionMetrics +from whylogs.proto import RegressionMetricsMessage + + +TEST_DATA_PATH = os.path.abspath(os.path.join(os.path.realpath( + os.path.dirname(__file__)), os.pardir, os.pardir, os.pardir, os.pardir, "testdata")) + + +def my_test(): + regmet = RegressionMetrics() + assert regmet.count == 0 + assert regmet.sum_diff == 0.0 + assert regmet.sum2_diff == 0.0 + assert regmet.sum_abs_diff == 0.0 + + assert regmet.mean_squared_error() is None + + assert regmet.mean_absolute_error() is None + assert regmet.root_mean_squared_error() is None + + +def test_load_parquet(): + mean_absolute_error = 85.94534216005789 + mean_squared_error = 11474.89611670205 + root_mean_squared_error = 107.12094154133472 + + regmet = RegressionMetrics() + df = pd.read_parquet(os.path.join(os.path.join(TEST_DATA_PATH, "metrics", "2021-02-12.parquet"))) + regmet.add(df["predictions"].to_list(), df["targets"].to_list()) + + assert regmet.count == len(df["predictions"].to_list()) + assert regmet.mean_squared_error() == pytest.approx(mean_squared_error, 0.01) + + assert regmet.mean_absolute_error() == pytest.approx(mean_absolute_error, 0.01) + assert regmet.root_mean_squared_error() == pytest.approx(root_mean_squared_error, 0.01) + + msg = regmet.to_protobuf() + new_regmet = RegressionMetrics.from_protobuf(msg) + assert regmet.count == new_regmet.count + assert regmet.mean_squared_error() == new_regmet.mean_squared_error() + assert regmet.root_mean_squared_error() == new_regmet.root_mean_squared_error() + assert regmet.mean_absolute_error() == new_regmet.mean_absolute_error() + + +def test_empty_protobuf_should_return_none(): + empty_message = RegressionMetricsMessage() + assert RegressionMetrics.from_protobuf(empty_message) is None + + +def test_merging(): + regmet_sum = RegressionMetrics() + + regmet = RegressionMetrics(prediction_field="predictions", target_field="targets") + df = pd.read_parquet(os.path.join(os.path.join(TEST_DATA_PATH, "metrics", "2021-02-12.parquet"))) + regmet.add(df["predictions"].to_list(), df["targets"].to_list()) + regmet_sum.add(df["predictions"].to_list(), df["targets"].to_list()) + + regmet_2 = RegressionMetrics(prediction_field="predictions", target_field="targets") + df_2 = pd.read_parquet(os.path.join(os.path.join(TEST_DATA_PATH, "metrics", "2021-02-13.parquet"))) + regmet_2.add(df_2["predictions"].to_list(), df_2["targets"].to_list()) + regmet_sum.add(df_2["predictions"].to_list(), df_2["targets"].to_list()) + + merged_reg_metr = regmet.merge(regmet_2) + + assert merged_reg_metr.count == regmet_sum.count + assert merged_reg_metr.mean_squared_error() == pytest.approx(regmet_sum.mean_squared_error(), 0.001) + assert merged_reg_metr.root_mean_squared_error() == pytest.approx(regmet_sum.root_mean_squared_error(), 0.001) + assert merged_reg_metr.mean_absolute_error() == pytest.approx(regmet_sum.mean_absolute_error(), 0.001) diff --git a/tests/unit/core/test_datasetprofile_metrics.py b/tests/unit/core/test_datasetprofile_metrics.py index a90d7c9c6a..65602ac0e1 100644 --- a/tests/unit/core/test_datasetprofile_metrics.py +++ b/tests/unit/core/test_datasetprofile_metrics.py @@ -1,4 +1,6 @@ import os +import pytest + from whylogs.core import DatasetProfile from whylogs.core.model_profile import ModelProfile @@ -34,3 +36,38 @@ def test_read_java_protobuf(): assert len(confusion_M.labels) == 2 for idx, lbl in enumerate(confusion_M.labels): assert lbl == labels[idx] + + +def test_parse_from_protobuf_with_regression(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + prof = DatasetProfile.read_protobuf(os.path.join( + TEST_DATA_PATH, "metrics", "regression_java.bin")) + assert prof.name == 'my-model-name' + assert prof.model_profile is not None + assert prof.model_profile.metrics is not None + confusion_M = prof.model_profile.metrics.confusion_matrix + regression_met = prof.model_profile.metrics.regression_metrics + assert regression_met is not None + # metrics + assert regression_met.count == 89 + assert regression_met.sum_abs_diff == pytest.approx(7649.1, 0.1) + assert regression_met.sum_diff == pytest.approx(522.7, 0.1) + assert regression_met.sum2_diff == pytest.approx(1021265.7, 0.1) + + +def test_track_metrics(): + import pandas as pd + mean_absolute_error = 85.94534216005789 + mean_squared_error = 11474.89611670205 + root_mean_squared_error = 107.12094154133472 + + x1 = DatasetProfile(name="test") + df = pd.read_parquet(os.path.join(os.path.join(TEST_DATA_PATH, "metrics", "2021-02-12.parquet"))) + x1.track_metrics(df["predictions"].to_list(), df["targets"].to_list()) + regression_metrics = x1.model_profile.metrics.regression_metrics + assert regression_metrics is not None + assert regression_metrics.count == len(df["predictions"].to_list()) + assert regression_metrics.mean_squared_error() == pytest.approx(mean_squared_error, 0.01) + + assert regression_metrics.mean_absolute_error() == pytest.approx(mean_absolute_error, 0.01) + assert regression_metrics.root_mean_squared_error() == pytest.approx(root_mean_squared_error, 0.01) diff --git a/tests/unit/core/test_model_profile.py b/tests/unit/core/test_model_profile.py index 3f12fa565f..27e60d7af4 100644 --- a/tests/unit/core/test_model_profile.py +++ b/tests/unit/core/test_model_profile.py @@ -6,7 +6,7 @@ def test_model_profile(): mod_prof = ModelProfile() assert mod_prof.output_fields == [] assert mod_prof.metrics is not None - assert mod_prof.metrics.confusion_matrix is not None + assert mod_prof.metrics.confusion_matrix is None message = mod_prof.to_protobuf() ModelProfile.from_protobuf(message) @@ -39,6 +39,7 @@ def test_merge_profile(): mod_prof = ModelProfile() assert mod_prof.output_fields == [] + mod_prof.add_output_field("predictions") mod_prof.compute_metrics(predictions_1, targets_1, scores_1) assert mod_prof.metrics is not None @@ -48,7 +49,7 @@ def test_merge_profile(): mod_prof_3 = mod_prof.merge(mod_prof_2) mod_prof_3.metrics.confusion_matrix - + assert mod_prof_3.output_fields == ["predictions"] def test_roundtrip_serialization(): original = ModelProfile(output_fields=["test"]) diff --git a/tox.ini b/tox.ini index 995e36d243..3fd08c643b 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,7 @@ # THIS SCRIPT IS SUPPOSED TO BE AN EXAMPLE. MODIFY IT ACCORDING TO YOUR NEEDS! [tox] -envlist = py36, py37, py38, flake8 +envlist =py36, py37, py38, py39, flake8 [testenv] @@ -31,3 +31,4 @@ python = 3.6: py36 3.7: py37, flake8 3.8: py38 + 3.9: py39