From a6cc7dbb6ed16085a6cad0b4da8d1e67999d93a6 Mon Sep 17 00:00:00 2001 From: pecop2 Date: Thu, 27 Jan 2022 17:45:09 +0100 Subject: [PATCH 1/2] Change total row number computation --- src/whylogs/core/datasetprofile.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/whylogs/core/datasetprofile.py b/src/whylogs/core/datasetprofile.py index a9827f064d..e2eb2c5d49 100644 --- a/src/whylogs/core/datasetprofile.py +++ b/src/whylogs/core/datasetprofile.py @@ -123,8 +123,6 @@ def __init__( self.model_profile = model_profile - self.column_row_dict = dict() - # Store Name attribute self._tags["name"] = name @@ -167,8 +165,7 @@ def session_timestamp_ms(self): @property def total_row_number(self): - dict_counts = self.column_row_dict.values() if len(self.column_row_dict) else [0] - return max(dict_counts) + return max([col_prof.counters.count for col_prof in self.columns.values()]) def add_output_field(self, field: Union[str, List[str]]): if self.model_profile is None: @@ -265,11 +262,6 @@ def track_datum(self, column_name, data, character_list=None, token_method=None) prof = ColumnProfile(column_name, constraints=constraints) self.columns[column_name] = prof - self.column_row_dict[column_name] = 0 - - # updating the map for every column name with increasing the number of tracked values - self.column_row_dict[column_name] += 1 - prof.track(data, character_list=None, token_method=None) def track_multi_column(self, columns): From 886837af1a6ce49b9233140637126eb00f6e496e Mon Sep 17 00:00:00 2001 From: pecop2 Date: Thu, 27 Jan 2022 18:20:22 +0100 Subject: [PATCH 2/2] Edge case not to throw a ValueError when no columns logged --- src/whylogs/core/datasetprofile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/whylogs/core/datasetprofile.py b/src/whylogs/core/datasetprofile.py index e2eb2c5d49..3885982b17 100644 --- a/src/whylogs/core/datasetprofile.py +++ b/src/whylogs/core/datasetprofile.py @@ -165,7 +165,8 @@ def session_timestamp_ms(self): @property def total_row_number(self): - return max([col_prof.counters.count for col_prof in self.columns.values()]) + column_counts = [col_prof.counters.count for col_prof in self.columns.values()] if len(self.columns) else [0] + return max(column_counts) def add_output_field(self, field: Union[str, List[str]]): if self.model_profile is None: