Merge fb9b7a8 into f8750a1

whylabs · Mar 15, 2021 · c6ad6e4 · c6ad6e4
2 parents f8750a1 + fb9b7a8
commit c6ad6e4
Show file tree

Hide file tree

Showing 10 changed files with 282 additions and 31 deletions.
diff --git a/proto b/proto
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -19,6 +19,7 @@ colorama==0.4.4
 coverage==5.3
 cryptography==3.3.2
 cycler==0.10.0
+scikit-learn==0.24.1
 databricks-cli==0.14.1
 decorator==4.4.2
 distlib==0.3.1

diff --git a/src/whylogs/core/datasetprofile.py b/src/whylogs/core/datasetprofile.py
@@ -214,8 +214,8 @@ def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions
         """
         if self.model_profile is None:
             self.model_profile = ModelProfile()
-        self.model_profile.compute_metrics(predictions, targets,
-                                           scores, target_field=target_field,
+        self.model_profile.compute_metrics(predictions=predictions, targets=targets,
+                                           scores=scores, target_field=target_field,
                                            prediction_field=prediction_field,
                                            score_field=score_field)
 

diff --git a/src/whylogs/core/metrics/model_metrics.py b/src/whylogs/core/metrics/model_metrics.py
@@ -1,6 +1,7 @@
 from typing import List, Union
 
 from whylogs.core.metrics.confusion_matrix import ConfusionMatrix
+from whylogs.core.metrics.regression_metrics import RegressionMetrics
 from whylogs.proto import ModelMetricsMessage
 
 
@@ -12,20 +13,28 @@ class ModelMetrics:
         confusion_matrix (ConfusionMatrix): ConfusionMatrix which keeps it track of counts with NumberTracker
     """
 
-    def __init__(self, confusion_matrix: ConfusionMatrix = None):
-        if confusion_matrix is None:
-            confusion_matrix = ConfusionMatrix()
+    def __init__(self, confusion_matrix: ConfusionMatrix = ConfusionMatrix(),
+                 regression_metrics: RegressionMetrics = RegressionMetrics()):
+        # if confusion_matrix is None:
+        #     confusion_matrix = ConfusionMatrix()
         self.confusion_matrix = confusion_matrix
+        # if regression_metrics is None:
+        #     regression_metrics = RegressionMetrics()
+        self.regression_metrics = regression_metrics
 
     def to_protobuf(self, ) -> ModelMetricsMessage:
-        return ModelMetricsMessage(scoreMatrix=self.confusion_matrix.to_protobuf() if self.confusion_matrix else None)
+        return ModelMetricsMessage(
+            scoreMatrix=self.confusion_matrix.to_protobuf() if self.confusion_matrix else None,
+            regressionMetrics=self.regression_metrics.to_protobuf() if self.regression_metrics else None)
 
     @classmethod
     def from_protobuf(cls, message, ):
-        return ModelMetrics(confusion_matrix=ConfusionMatrix.from_protobuf(message.scoreMatrix))
+        return ModelMetrics(
+            confusion_matrix=ConfusionMatrix.from_protobuf(message.scoreMatrix),
+            regression_metrics=RegressionMetrics.from_protobuf(message.regressionMetrics))
 
-    def compute_confusion_matrix(self, predictions: List[Union[str, int, bool]],
-                                 targets: List[Union[str, int, bool]],
+    def compute_confusion_matrix(self, predictions: List[Union[str, int, bool, float]],
+                                 targets: List[Union[str, int, bool, float]],
                                  scores: List[float] = None,
                                  target_field: str = None,
                                  prediction_field: str = None,
@@ -54,6 +63,14 @@ def compute_confusion_matrix(self, predictions: List[Union[str, int, bool]],
             self.confusion_matrix = self.confusion_matrix.merge(
                 confusion_matrix)
 
+    def compute_regression_metrics(self, predictions: List[float],
+                                   targets: List[float],
+                                   target_field: str = None,
+                                   prediction_field: str = None):
+        regression_metrics = RegressionMetrics(target_field=target_field, prediction_field=prediction_field)
+        regression_metrics.add(predictions, targets)
+        self.regression_metrics = self.regression_metrics.merge(regression_metrics)
+
     def merge(self, other):
         """
 
@@ -66,4 +83,6 @@ def merge(self, other):
             return self
         if self.confusion_matrix is None:
             return other
-        return ModelMetrics(confusion_matrix=self.confusion_matrix.merge(other.confusion_matrix))
+        return ModelMetrics(
+            confusion_matrix=self.confusion_matrix.merge(other.confusion_matrix),
+            regression_metrics=self.regression_metrics.merge(other.regression_metrics))
diff --git a/src/whylogs/core/metrics/regression_metrics.py b/src/whylogs/core/metrics/regression_metrics.py
@@ -0,0 +1,128 @@
+import math
+from typing import List
+
+# import numpy as np
+from sklearn.utils.multiclass import type_of_target
+
+from whylogs.proto import RegressionMetricsMessage
+
+SUPPORTED_TYPES = ("regression")
+
+
+class RegressionMetrics:
+
+    def __init__(self,
+                 prediction_field: str = None,
+                 target_field: str = None):
+        self.prediction_field = prediction_field
+        self.target_field = target_field
+
+        self.count = 0
+        self.sum_abs_diff = 0.0
+        self.sum_diff = 0.0
+        self.sum2_diff = 0.0
+        # to add later
+        # self.nt_diff = whylogs.core.statistics.NumberTracker()
+
+    def add(self, predictions: List[float],
+            targets: List[float]):
+        """
+        Function adds predictions and targets computation of regression metrics.
+
+        Args:
+            predictions (List[Union[str, int, bool]]):
+            targets (List[Union[str, int, bool]]):
+
+        Raises:
+            NotImplementedError: in case targets do not fall into binary or
+            multiclass suport
+            ValueError: incase missing validation or predictions
+        """
+        tgt_type = type_of_target(targets)
+        if tgt_type not in ("continuous"):
+            raise NotImplementedError(f"target type: {tgt_type} not supported for these metrics")
+
+        if not isinstance(targets, list):
+            targets = [targets]
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+
+        if len(targets) != len(predictions):
+            raise ValueError(
+                "both targets and predictions need to have the same length")
+        # need to vectorize this
+        for idx, target in enumerate(targets):
+
+            self.sum_abs_diff += abs(predictions[idx] - target)
+            self.sum_diff += predictions[idx] - target
+            self.sum2_diff += (predictions[idx] - target)**2
+            # To add later
+            # self.nt_diff.track(predictions[idx] - target)
+            self.count += 1
+
+    def mean_absolute_error(self):
+        if self.count == 0:
+            return None
+        return self.sum_abs_diff / self.count
+
+    def mean_squared_error(self):
+        if self.count == 0:
+            return None
+        return self.sum2_diff / self.count
+
+    def root_mean_squared_error(self):
+        if self.count == 0:
+            return None
+        return math.sqrt(self.sum2_diff / self.count)
+
+    def merge(self, other_reg_met):
+        """
+        Merge two seperate confusion matrix which may or may not overlap in labels.
+
+        Args:
+              other_cm (Optional[ConfusionMatrix]): confusion_matrix to merge with self
+        Returns:
+              ConfusionMatrix: merged confusion_matrix
+        """
+        # TODO: always return new objects
+        if self.count == 0:
+            return other_reg_met
+        if other_reg_met.count == 0:
+            return self
+
+        new_reg = RegressionMetrics()
+        new_reg.count = self.count + other_reg_met.count
+        new_reg.sum_abs_diff = self.sum_abs_diff + other_reg_met.sum_abs_diff
+        new_reg.sum_diff = self.sum_diff + other_reg_met.sum_diff
+        new_reg.sum2_diff = self.sum2_diff + other_reg_met.sum2_diff
+
+        return new_reg
+
+    def to_protobuf(self, ):
+        """
+        Convert to protobuf
+
+        Returns:
+            TYPE: Protobuf Message
+        """
+
+        return RegressionMetricsMessage(
+            prediction_field=self.prediction_field,
+            target_field=self.target_field,
+            count=self.count,
+            sum_abs_diff=self.sum_abs_diff,
+            sum_diff=self.sum_diff,
+            sum2_diff=self.sum2_diff)
+
+    @classmethod
+    def from_protobuf(cls, message: RegressionMetricsMessage, ):
+        if message.ByteSize() == 0:
+            return None
+
+        reg_met = RegressionMetrics()
+        reg_met.count = message.count
+        reg_met.sum_abs_diff = message.sum_abs_diff
+        reg_met.sum_diff = message.sum_diff
+        reg_met.sum2_diff = message.sum2_diff
+
+        return reg_met
diff --git a/src/whylogs/core/model_profile.py b/src/whylogs/core/model_profile.py
@@ -1,11 +1,13 @@
 from sklearn.utils.multiclass import type_of_target
 import numpy as np
 
-from whylogs.proto import ModelProfileMessage
+from whylogs.proto import ModelProfileMessage, ModelType
 from whylogs.core.metrics.model_metrics import ModelMetrics
 
 SUPPORTED_TYPES = ("binary", "multiclass")
 
+# MODEL_TYPES = ModelType
+
 
 class ModelProfile:
     """
@@ -21,7 +23,8 @@ class ModelProfile:
 
     def __init__(self,
                  output_fields=None,
-                 metrics: ModelMetrics = None):
+                 metrics: ModelMetrics = None,
+                 model_type: ModelType = ModelType.UNKNOWN):
         super().__init__()
 
         if output_fields is None:
@@ -30,6 +33,7 @@ def __init__(self,
         if metrics is None:
             metrics = ModelMetrics()
         self.metrics = metrics
+        self.model_type = ModelType.UNKNOWN
 
     def add_output_field(self, field: str):
         if field not in self.output_fields:
@@ -48,14 +52,14 @@ def compute_metrics(self, targets,
         Parameters
         ----------
         targets : List
-            targets (or actuals) for validation
+            targets (or actuals) for validation, if these are floats it is assumed the model is a regression type model
         predictions : List
             predictions (or inferred values)
         scores : List, optional
-            associated scores for each prediction
+            associated scores for each prediction (for binary and multiclass problems)
         target_field : str, optional
         prediction_field : str, optional
-        score_field : str, optional
+        score_field : str, optional (for binary and multiclass problems)
 
 
         Raises
@@ -64,21 +68,30 @@ def compute_metrics(self, targets,
 
         """
         tgt_type = type_of_target(targets)
-        if tgt_type not in ("binary", "multiclass"):
-            raise NotImplementedError("target type not supported yet")
-        # if score are not present set them to 1.
-        if scores is None:
-            scores = np.ones(len(targets))
-
-        scores = np.array(scores)
-
-        # compute confusion_matrix
-        self.metrics.compute_confusion_matrix(predictions=predictions,
-                                              targets=targets,
-                                              scores=scores,
-                                              target_field=target_field,
-                                              prediction_field=prediction_field,
-                                              score_field=score_field)
+        if tgt_type in ("continuous"):
+            self.model_type = ModelType.REGRESSION
+            self.metrics.compute_regression_metrics(predictions=predictions,
+                                                    targets=targets,
+                                                    target_field=target_field,
+                                                    prediction_field=prediction_field)
+        elif tgt_type in ("binary", "multiclass"):
+            self.model_type = ModelType.CLASSIFICATION
+
+            # if score are not present set them to 1.
+            if scores is None:
+                scores = np.ones(len(targets))
+
+            scores = np.array(scores)
+
+            # compute confusion_matrix
+            self.metrics.compute_confusion_matrix(predictions=predictions,
+                                                  targets=targets,
+                                                  scores=scores,
+                                                  target_field=target_field,
+                                                  prediction_field=prediction_field,
+                                                  score_field=score_field)
+        else:
+            raise NotImplementedError(f"target type {tgt_type} not supported yet")
 
     def to_protobuf(self):
         return ModelProfileMessage(output_fields=self.output_fields,

diff --git a/testdata/metrics/2021-02-12.parquet b/testdata/metrics/2021-02-12.parquet
diff --git a/testdata/metrics/regression_java.bin b/testdata/metrics/regression_java.bin
diff --git a/tests/unit/core/metrics/test_regression_metrics.py b/tests/unit/core/metrics/test_regression_metrics.py
@@ -0,0 +1,49 @@
+import os
+
+import pandas as pd
+import pytest
+
+from whylogs.core.metrics.regression_metrics import RegressionMetrics
+from whylogs.proto import RegressionMetricsMessage
+
+
+
+TEST_DATA_PATH = os.path.abspath(os.path.join(os.path.realpath(
+    os.path.dirname(__file__)), os.pardir,os.pardir, os.pardir, os.pardir, "testdata"))
+
+def my_test():
+    regmet= RegressionMetrics()
+    assert regmet.count == 0
+    assert regmet.sum_diff==0.0
+    assert regmet.sum2_diff ==0.0
+    assert regmet.sum_abs_diff==0.0
+
+
+def test_load_parquet():
+    mean_absolute_error=85.94534216005789
+    mean_squared_error =11474.89611670205
+    root_mean_squared_error =107.12094154133472
+
+    regmet=RegressionMetrics()
+    df= pd.read_parquet(os.path.join(os.path.join(TEST_DATA_PATH,"metrics","2021-02-12.parquet")))
+    regmet.add(df["predictions"].to_list(),df["targets"].to_list())
+
+    assert regmet.count==len(df["predictions"].to_list())
+    assert regmet.mean_squared_error()==pytest.approx(mean_squared_error,0.01)
+
+    assert regmet.mean_absolute_error() == pytest.approx(mean_absolute_error,0.01)
+    assert regmet.root_mean_squared_error() == pytest.approx(root_mean_squared_error,0.01)
+
+    msg= regmet.to_protobuf()
+    new_regmet= RegressionMetrics.from_protobuf(msg)
+    assert regmet.count  ==new_regmet.count
+    assert regmet.mean_squared_error()  ==new_regmet.mean_squared_error() 
+    assert regmet.root_mean_squared_error()  ==new_regmet.root_mean_squared_error() 
+    assert regmet.mean_absolute_error()  ==new_regmet.mean_absolute_error() 
+
+def test_empty_protobuf_should_return_none():
+    empty_message = RegressionMetricsMessage()
+    assert RegressionMetrics.from_protobuf(empty_message) is None
+
+
+