Skip to content

Commit

Permalink
Merge f47c445 into f95d424
Browse files Browse the repository at this point in the history
  • Loading branch information
lalmei committed Mar 25, 2021
2 parents f95d424 + f47c445 commit 2689210
Show file tree
Hide file tree
Showing 19 changed files with 407 additions and 82 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.3-dev5
current_version = 0.4.1-dev0
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
# built documents.
#
# The short X.Y version.
version = "0.3.3-dev5"
version = "0.4.1-dev0"
# The full version, including alpha/beta/rc tags.
release = "" # Is set by calling `setup.py docs`

Expand Down
2 changes: 1 addition & 1 deletion proto
Submodule proto updated 1 files
+18 −0 src/messages.proto
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ colorama==0.4.4
coverage==5.3
cryptography==3.3.2
cycler==0.10.0
scikit-learn==0.24.1
databricks-cli==0.14.1
decorator==4.4.2
distlib==0.3.1
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[metadata]
name = whylogs
version = 0.3.3-dev5
version = 0.4.1-dev0
description = Profile and monitor your ML data pipeline end-to-end
author = WhyLabs.ai
author-email = support@whylabs.ai
Expand Down
2 changes: 1 addition & 1 deletion src/whylogs/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""WhyLabs version number."""

__version__ = "0.3.3-dev5"
__version__ = "0.4.1-dev0"
46 changes: 24 additions & 22 deletions src/whylogs/app/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from whylogs.core import DatasetProfile, TrackImage, METADATA_DEFAULT_ATTRIBUTES, TrackBB
from whylogs.core.statistics.constraints import DatasetConstraints
from whylogs.io import LocalDataset

from whylogs.proto import ModelType

# TODO upgrade to Classes
SegmentTag = Dict[str, any]
Expand Down Expand Up @@ -124,9 +124,8 @@ def segmented_profiles(self, ) -> Dict[str, DatasetProfile]:

def get_segment(self, segment: Segment) -> Optional[DatasetProfile]:
hashed_seg = hash_segment(segment)
segment_profile = self._profiles[-1]["segmented_profiles"].get(
return self._profiles[-1]["segmented_profiles"].get(
hashed_seg, None)
return segment_profile

def set_segments(self, segments: Union[List[Segment], List[str]]) -> None:
if segments:
Expand Down Expand Up @@ -186,18 +185,15 @@ def _set_rotation(self, with_rotation_time: str = None):
self.rotate_at = self.rotate_when(current_time)

def rotate_when(self, time):
result = time + self.interval
return result
return time + self.interval

def should_rotate(self, ):

if self.with_rotation_time is None:
return False

current_time = int(datetime.datetime.utcnow().timestamp())
if current_time >= self.rotate_at:
return True
return False
return current_time >= self.rotate_at

def _rotate_time(self):
"""
Expand All @@ -212,7 +208,7 @@ def _rotate_time(self):
time_tuple.strftime(self.suffix), self.suffix)

# modify the segment datetime stamps
if (self.segments is None) or ((self.segments is not None) and self.profile_full_dataset):
if self.segments is None or self.profile_full_dataset:
self._profiles[-1]["full_profile"].dataset_timestamp = log_datetime
if self.segments is not None:
for _, each_prof in self._profiles[-1]["segmented_profiles"].items():
Expand Down Expand Up @@ -255,11 +251,10 @@ def flush(self, rotation_suffix: str = None):
for hashseg, each_seg_prof in self._profiles[-1]["segmented_profiles"].items():
seg_suffix = hashseg
full_suffix = "_" + seg_suffix
if rotation_suffix is None:
writer.write(each_seg_prof, full_suffix)
else:
if rotation_suffix is not None:
full_suffix += rotation_suffix
writer.write(each_seg_prof, full_suffix)

writer.write(each_seg_prof, full_suffix)

def full_profile_check(self, ) -> bool:
"""
Expand Down Expand Up @@ -342,13 +337,19 @@ def log_segment_datum(self, feature_name, value):

def log_metrics(self,
targets, predictions,
scores=None, target_field=None, prediction_field=None,
scores=None,
model_type: ModelType = None,
target_field=None,
prediction_field=None,
score_field=None):

self._profiles[-1]["full_profile"].track_metrics(
targets, predictions, scores, target_field=target_field,
targets, predictions, scores,
model_type=model_type,
target_field=target_field,
prediction_field=prediction_field,
score_field=score_field)
score_field=score_field,
)

def log_image(self,
image,
Expand Down Expand Up @@ -413,7 +414,7 @@ def log_local_dataset(self, root_dir, folder_feature_name="folder_feature", imag
if isinstance(data, pd.DataFrame):
self.log_dataframe(data)

elif isinstance(data, Dict) or isinstance(data, list):
elif isinstance(data, (Dict, list)):
self.log_annotation(annotation_data=data)
elif isinstance(data, ImageType):
if image_feature_transforms:
Expand Down Expand Up @@ -508,10 +509,11 @@ def log_segments_keys(self, data):
for each_segment in segments:
try:
segment_df = grouped_data.get_group(each_segment)
segment_tags = []
for i in range(len(self.segments)):
segment_tags.append(
{"key": self.segments[i], "value": each_segment[i]})
segment_tags = [
{"key": self.segments[i], "value": each_segment[i]}
for i in range(len(self.segments))
]

self.log_df_segment(segment_df, segment_tags)
except KeyError:
continue
Expand All @@ -522,7 +524,7 @@ def log_fixed_segments(self, data):
for segment_tag in self.segments:
# create keys
segment_keys = [feature["key"] for feature in segment_tag]
seg = tuple([feature["value"] for feature in segment_tag])
seg = tuple(feature["value"] for feature in segment_tag)

grouped_data = data.groupby(segment_keys)

Expand Down
37 changes: 25 additions & 12 deletions src/whylogs/core/datasetprofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
DatasetProperties,
DatasetSummary,
MessageSegment,
ModelType
)
from whylogs.core.statistics.constraints import DatasetConstraints, SummaryConstraints
from whylogs.util import time
Expand Down Expand Up @@ -131,9 +132,9 @@ def __init__(
if columns is None:
columns = {}
if tags is None:
tags = dict()
tags = {}
if metadata is None:
metadata = dict()
metadata = {}
if session_id is None:
session_id = uuid4().hex

Expand Down Expand Up @@ -189,9 +190,14 @@ def add_output_field(self, field: Union[str, List[str]]):
else:
self.model_profile.add_output_field(field)

def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions: List[Union[str, bool, float, int]], scores: List[float] = None,
target_field: str = None, prediction_field: str = None,
score_field: str = None):
def track_metrics(self,
targets: List[Union[str, bool, float, int]],
predictions: List[Union[str, bool, float, int]],
scores: List[float] = None,
model_type: ModelType = None,
target_field: str = None,
prediction_field: str = None,
score_field: str = None, ):
"""
Function to track metrics based on validation data.
Expand All @@ -206,6 +212,14 @@ def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions
inferred/predicted values
scores : List[float], optional
assocaited scores for each inferred, all values set to 1 if not passed
target_field : str, optional
Description
prediction_field : str, optional
Description
score_field : str, optional
Description
model_type : ModelType, optional
Defaul is Classification type.
target_field : str, optional
prediction_field : str, optional
score_field : str, optional
Expand All @@ -214,8 +228,8 @@ def track_metrics(self, targets: List[Union[str, bool, float, int]], predictions
"""
if self.model_profile is None:
self.model_profile = ModelProfile()
self.model_profile.compute_metrics(predictions, targets,
scores, target_field=target_field,
self.model_profile.compute_metrics(predictions=predictions, targets=targets,
scores=scores, model_type=model_type, target_field=target_field,
prediction_field=prediction_field,
score_field=score_field)

Expand Down Expand Up @@ -350,7 +364,8 @@ def generate_constraints(self) -> DatasetConstraints:
Protobuf constraints message.
"""
self.validate()
constraints = [(name, col.generate_constraints()) for name, col in self.columns.items()]
constraints = [(name, col.generate_constraints())
for name, col in self.columns.items()]
# filter empty constraints
constraints = [(n, c) for n, c in constraints if c is not None]
return DatasetConstraints(self.to_properties(), None, dict(constraints))
Expand Down Expand Up @@ -826,10 +841,8 @@ def flatten_dataset_frequent_strings(dataset_summary: DatasetSummary):
try:
item_summary = getter(
getter(col, "string_summary"), "frequent").items
items = {}
for item in item_summary:
items[item.value] = int(item.estimate)
if len(items) > 0:
items = {item.value: int(item.estimate) for item in item_summary}
if items:
frequent_strings[col_name] = items
except KeyError:
continue
Expand Down
66 changes: 56 additions & 10 deletions src/whylogs/core/metrics/model_metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import List, Union

from whylogs.core.metrics.confusion_matrix import ConfusionMatrix
from whylogs.proto import ModelMetricsMessage
from whylogs.core.metrics.regression_metrics import RegressionMetrics
from whylogs.proto import ModelMetricsMessage, ModelType


class ModelMetrics:
Expand All @@ -10,22 +11,45 @@ class ModelMetrics:
Attributes:
confusion_matrix (ConfusionMatrix): ConfusionMatrix which keeps it track of counts with NumberTracker
regression_metrics (RegressionMetrics): Regression Metrics keeps track of a common regression metrics in case the targets are continous.
"""

def __init__(self, confusion_matrix: ConfusionMatrix = None):
if confusion_matrix is None:
confusion_matrix = ConfusionMatrix()
def __init__(self, confusion_matrix: ConfusionMatrix = None,
regression_metrics: RegressionMetrics = None,
model_type: ModelType = ModelType.UNKNOWN):

self.model_type = model_type
if confusion_matrix is not None and regression_metrics is not None:
raise NotImplementedError("Regression Metrics together with Confusion Matrix not implemented yet")

if confusion_matrix is not None:
if (self.model_type == ModelType.REGRESSION):
raise NotImplementedError("Incorrent model type")
self.model_type = ModelType.CLASSIFICATION

self.confusion_matrix = confusion_matrix

if regression_metrics is not None:
if (self.model_type == ModelType.CLASSIFICATION):
raise NotImplementedError("Incorrent model type")
self.model_type = ModelType.REGRESSION
self.regression_metrics = regression_metrics

def to_protobuf(self, ) -> ModelMetricsMessage:
return ModelMetricsMessage(scoreMatrix=self.confusion_matrix.to_protobuf() if self.confusion_matrix else None)
return ModelMetricsMessage(
scoreMatrix=self.confusion_matrix.to_protobuf() if self.confusion_matrix else None,
regressionMetrics=self.regression_metrics.to_protobuf() if self.regression_metrics else None,
modelType=self.model_type)

@classmethod
def from_protobuf(cls, message, ):
return ModelMetrics(confusion_matrix=ConfusionMatrix.from_protobuf(message.scoreMatrix))
return ModelMetrics(
confusion_matrix=ConfusionMatrix.from_protobuf(message.scoreMatrix),
regression_metrics=RegressionMetrics.from_protobuf(message.regressionMetrics),
model_type=message.modelType)

def compute_confusion_matrix(self, predictions: List[Union[str, int, bool]],
targets: List[Union[str, int, bool]],
def compute_confusion_matrix(self, predictions: List[Union[str, int, bool, float]],
targets: List[Union[str, int, bool, float]],
scores: List[float] = None,
target_field: str = None,
prediction_field: str = None,
Expand All @@ -48,12 +72,23 @@ def compute_confusion_matrix(self, predictions: List[Union[str, int, bool]],
score_field=score_field)
confusion_matrix.add(predictions, targets, scores)

if self.confusion_matrix.labels is None or self.confusion_matrix.labels == []:
if self.confusion_matrix is None or self.confusion_matrix.labels is None or self.confusion_matrix.labels == []:
self.confusion_matrix = confusion_matrix
else:
self.confusion_matrix = self.confusion_matrix.merge(
confusion_matrix)

def compute_regression_metrics(self, predictions: List[Union[float, int]],
targets: List[Union[float, int]],
target_field: str = None,
prediction_field: str = None):
regression_metrics = RegressionMetrics(target_field=target_field, prediction_field=prediction_field)
regression_metrics.add(predictions, targets)
if self.regression_metrics:
self.regression_metrics = self.regression_metrics.merge(regression_metrics)
else:
self.regression_metrics = regression_metrics

def merge(self, other):
"""
Expand All @@ -66,4 +101,15 @@ def merge(self, other):
return self
if self.confusion_matrix is None:
return other
return ModelMetrics(confusion_matrix=self.confusion_matrix.merge(other.confusion_matrix))

if self.model_type is None or other.model_type is None:
model_type = ModelType.UNKNOWN
elif other.model_type != self.model_type:
model_type = ModelType.UNKNOWN
else:
model_type = self.model_type

return ModelMetrics(
confusion_matrix=self.confusion_matrix.merge(other.confusion_matrix),
regression_metrics=self.regression_metrics.merge(other.regression_metrics),
model_type=model_type)
Loading

0 comments on commit 2689210

Please sign in to comment.