-
Notifications
You must be signed in to change notification settings - Fork 118
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
282 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import math | ||
from typing import List | ||
|
||
# import numpy as np | ||
from sklearn.utils.multiclass import type_of_target | ||
|
||
from whylogs.proto import RegressionMetricsMessage | ||
|
||
SUPPORTED_TYPES = ("regression") | ||
|
||
|
||
class RegressionMetrics: | ||
|
||
def __init__(self, | ||
prediction_field: str = None, | ||
target_field: str = None): | ||
self.prediction_field = prediction_field | ||
self.target_field = target_field | ||
|
||
self.count = 0 | ||
self.sum_abs_diff = 0.0 | ||
self.sum_diff = 0.0 | ||
self.sum2_diff = 0.0 | ||
# to add later | ||
# self.nt_diff = whylogs.core.statistics.NumberTracker() | ||
|
||
def add(self, predictions: List[float], | ||
targets: List[float]): | ||
""" | ||
Function adds predictions and targets computation of regression metrics. | ||
Args: | ||
predictions (List[Union[str, int, bool]]): | ||
targets (List[Union[str, int, bool]]): | ||
Raises: | ||
NotImplementedError: in case targets do not fall into binary or | ||
multiclass suport | ||
ValueError: incase missing validation or predictions | ||
""" | ||
tgt_type = type_of_target(targets) | ||
if tgt_type not in ("continuous"): | ||
raise NotImplementedError(f"target type: {tgt_type} not supported for these metrics") | ||
|
||
if not isinstance(targets, list): | ||
targets = [targets] | ||
if not isinstance(predictions, list): | ||
predictions = [predictions] | ||
|
||
if len(targets) != len(predictions): | ||
raise ValueError( | ||
"both targets and predictions need to have the same length") | ||
# need to vectorize this | ||
for idx, target in enumerate(targets): | ||
|
||
self.sum_abs_diff += abs(predictions[idx] - target) | ||
self.sum_diff += predictions[idx] - target | ||
self.sum2_diff += (predictions[idx] - target)**2 | ||
# To add later | ||
# self.nt_diff.track(predictions[idx] - target) | ||
self.count += 1 | ||
|
||
def mean_absolute_error(self): | ||
if self.count == 0: | ||
return None | ||
return self.sum_abs_diff / self.count | ||
|
||
def mean_squared_error(self): | ||
if self.count == 0: | ||
return None | ||
return self.sum2_diff / self.count | ||
|
||
def root_mean_squared_error(self): | ||
if self.count == 0: | ||
return None | ||
return math.sqrt(self.sum2_diff / self.count) | ||
|
||
def merge(self, other_reg_met): | ||
""" | ||
Merge two seperate confusion matrix which may or may not overlap in labels. | ||
Args: | ||
other_cm (Optional[ConfusionMatrix]): confusion_matrix to merge with self | ||
Returns: | ||
ConfusionMatrix: merged confusion_matrix | ||
""" | ||
# TODO: always return new objects | ||
if self.count == 0: | ||
return other_reg_met | ||
if other_reg_met.count == 0: | ||
return self | ||
|
||
new_reg = RegressionMetrics() | ||
new_reg.count = self.count + other_reg_met.count | ||
new_reg.sum_abs_diff = self.sum_abs_diff + other_reg_met.sum_abs_diff | ||
new_reg.sum_diff = self.sum_diff + other_reg_met.sum_diff | ||
new_reg.sum2_diff = self.sum2_diff + other_reg_met.sum2_diff | ||
|
||
return new_reg | ||
|
||
def to_protobuf(self, ): | ||
""" | ||
Convert to protobuf | ||
Returns: | ||
TYPE: Protobuf Message | ||
""" | ||
|
||
return RegressionMetricsMessage( | ||
prediction_field=self.prediction_field, | ||
target_field=self.target_field, | ||
count=self.count, | ||
sum_abs_diff=self.sum_abs_diff, | ||
sum_diff=self.sum_diff, | ||
sum2_diff=self.sum2_diff) | ||
|
||
@classmethod | ||
def from_protobuf(cls, message: RegressionMetricsMessage, ): | ||
if message.ByteSize() == 0: | ||
return None | ||
|
||
reg_met = RegressionMetrics() | ||
reg_met.count = message.count | ||
reg_met.sum_abs_diff = message.sum_abs_diff | ||
reg_met.sum_diff = message.sum_diff | ||
reg_met.sum2_diff = message.sum2_diff | ||
|
||
return reg_met |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import os | ||
|
||
import pandas as pd | ||
import pytest | ||
|
||
from whylogs.core.metrics.regression_metrics import RegressionMetrics | ||
from whylogs.proto import RegressionMetricsMessage | ||
|
||
|
||
|
||
TEST_DATA_PATH = os.path.abspath(os.path.join(os.path.realpath( | ||
os.path.dirname(__file__)), os.pardir,os.pardir, os.pardir, os.pardir, "testdata")) | ||
|
||
def my_test(): | ||
regmet= RegressionMetrics() | ||
assert regmet.count == 0 | ||
assert regmet.sum_diff==0.0 | ||
assert regmet.sum2_diff ==0.0 | ||
assert regmet.sum_abs_diff==0.0 | ||
|
||
|
||
def test_load_parquet(): | ||
mean_absolute_error=85.94534216005789 | ||
mean_squared_error =11474.89611670205 | ||
root_mean_squared_error =107.12094154133472 | ||
|
||
regmet=RegressionMetrics() | ||
df= pd.read_parquet(os.path.join(os.path.join(TEST_DATA_PATH,"metrics","2021-02-12.parquet"))) | ||
regmet.add(df["predictions"].to_list(),df["targets"].to_list()) | ||
|
||
assert regmet.count==len(df["predictions"].to_list()) | ||
assert regmet.mean_squared_error()==pytest.approx(mean_squared_error,0.01) | ||
|
||
assert regmet.mean_absolute_error() == pytest.approx(mean_absolute_error,0.01) | ||
assert regmet.root_mean_squared_error() == pytest.approx(root_mean_squared_error,0.01) | ||
|
||
msg= regmet.to_protobuf() | ||
new_regmet= RegressionMetrics.from_protobuf(msg) | ||
assert regmet.count ==new_regmet.count | ||
assert regmet.mean_squared_error() ==new_regmet.mean_squared_error() | ||
assert regmet.root_mean_squared_error() ==new_regmet.root_mean_squared_error() | ||
assert regmet.mean_absolute_error() ==new_regmet.mean_absolute_error() | ||
|
||
def test_empty_protobuf_should_return_none(): | ||
empty_message = RegressionMetricsMessage() | ||
assert RegressionMetrics.from_protobuf(empty_message) is None | ||
|
||
|
||
|
Oops, something went wrong.