Merge b48d1c1 into f95d424

whylabs · Mar 25, 2021 · 490cce5 · 490cce5
2 parents f95d424 + b48d1c1
commit 490cce5
Show file tree

Hide file tree

Showing 34 changed files with 87 additions and 130 deletions.
diff --git a/scripts/profiler.py b/scripts/profiler.py
@@ -73,8 +73,7 @@ def csv_reader(f, date_format: str = None, dropna=False, infer_dtypes=False, **k
 
     for batch in pd.read_csv(f, **opts):
         records = df_to_records(batch, dropna=dropna)
-        for record in records:
-            yield record
+        yield from records
 
 
 def run(

diff --git a/src/whylogs/app/config.py b/src/whylogs/app/config.py
@@ -256,13 +256,11 @@ def load_config(path_to_config: str = None):
                     return session_config
             except IOError as e:
                 logger.warning("Failed to load YAML config", e)
-                pass
     else:
         try:
             with open(path_to_config, "rt") as f:
                 session_config = SessionConfig.from_yaml(f)
                 return session_config
         except IOError as e:
             logger.warning("Failed to load YAML config", e)
-            pass
     return None
diff --git a/src/whylogs/app/logger.py b/src/whylogs/app/logger.py
@@ -124,9 +124,8 @@ def segmented_profiles(self, ) -> Dict[str, DatasetProfile]:
 
     def get_segment(self, segment: Segment) -> Optional[DatasetProfile]:
         hashed_seg = hash_segment(segment)
-        segment_profile = self._profiles[-1]["segmented_profiles"].get(
+        return self._profiles[-1]["segmented_profiles"].get(
             hashed_seg, None)
-        return segment_profile
 
     def set_segments(self, segments: Union[List[Segment], List[str]]) -> None:
         if segments:
@@ -186,18 +185,15 @@ def _set_rotation(self, with_rotation_time: str = None):
             self.rotate_at = self.rotate_when(current_time)
 
     def rotate_when(self, time):
-        result = time + self.interval
-        return result
+        return time + self.interval
 
     def should_rotate(self, ):
 
         if self.with_rotation_time is None:
             return False
 
         current_time = int(datetime.datetime.utcnow().timestamp())
-        if current_time >= self.rotate_at:
-            return True
-        return False
+        return current_time >= self.rotate_at
 
     def _rotate_time(self):
         """
@@ -212,7 +208,7 @@ def _rotate_time(self):
             time_tuple.strftime(self.suffix), self.suffix)
 
         # modify the segment datetime stamps
-        if (self.segments is None) or ((self.segments is not None) and self.profile_full_dataset):
+        if self.segments is None or self.profile_full_dataset:
             self._profiles[-1]["full_profile"].dataset_timestamp = log_datetime
         if self.segments is not None:
             for _, each_prof in self._profiles[-1]["segmented_profiles"].items():
@@ -255,11 +251,10 @@ def flush(self, rotation_suffix: str = None):
                 for hashseg, each_seg_prof in self._profiles[-1]["segmented_profiles"].items():
                     seg_suffix = hashseg
                     full_suffix = "_" + seg_suffix
-                    if rotation_suffix is None:
-                        writer.write(each_seg_prof, full_suffix)
-                    else:
+                    if rotation_suffix is not None:
                         full_suffix += rotation_suffix
-                        writer.write(each_seg_prof, full_suffix)
+
+                    writer.write(each_seg_prof, full_suffix)
 
     def full_profile_check(self, ) -> bool:
         """
@@ -413,7 +408,7 @@ def log_local_dataset(self, root_dir, folder_feature_name="folder_feature", imag
             if isinstance(data, pd.DataFrame):
                 self.log_dataframe(data)
 
-            elif isinstance(data, Dict) or isinstance(data, list):
+            elif isinstance(data, (Dict, list)):
                 self.log_annotation(annotation_data=data)
             elif isinstance(data, ImageType):
                 if image_feature_transforms:
@@ -508,10 +503,11 @@ def log_segments_keys(self, data):
         for each_segment in segments:
             try:
                 segment_df = grouped_data.get_group(each_segment)
-                segment_tags = []
-                for i in range(len(self.segments)):
-                    segment_tags.append(
-                        {"key": self.segments[i], "value": each_segment[i]})
+                segment_tags = [
+                    {"key": self.segments[i], "value": each_segment[i]}
+                    for i in range(len(self.segments))
+                ]
+
                 self.log_df_segment(segment_df, segment_tags)
             except KeyError:
                 continue
@@ -522,7 +518,7 @@ def log_fixed_segments(self, data):
         for segment_tag in self.segments:
             # create keys
             segment_keys = [feature["key"] for feature in segment_tag]
-            seg = tuple([feature["value"] for feature in segment_tag])
+            seg = tuple(feature["value"] for feature in segment_tag)
 
             grouped_data = data.groupby(segment_keys)
 

diff --git a/src/whylogs/app/session.py b/src/whylogs/app/session.py
@@ -267,20 +267,18 @@ def new_profile(
             session_timestamp = self._session_time
 
         if tags is None:
-            tags = dict()
+            tags = {}
         if self.pipeline:
             tags["Pipeline"] = self.pipeline
 
-        profile = DatasetProfile(
+        return DatasetProfile(
             dataset_name,
             dataset_timestamp=dataset_timestamp,
             session_timestamp=session_timestamp,
             tags=tags,
             metadata=metadata,
         )
 
-        return profile
-
     def close(self):
         """
         Deactivate this session and flush all associated loggers

diff --git a/src/whylogs/app/writers.py b/src/whylogs/app/writers.py
@@ -97,8 +97,7 @@ def path_suffix(self, profile: DatasetProfile):
         applying the path templating defined in `self.path_template`
         """
         kwargs = self.template_params(profile)
-        path = self.path_template.substitute(**kwargs)
-        return path
+        return self.path_template.substitute(**kwargs)
 
     def file_name(self, profile: DatasetProfile, file_extension: str):
         """

diff --git a/src/whylogs/cli/demo_cli.py b/src/whylogs/cli/demo_cli.py
@@ -161,10 +161,10 @@ def init(project_dir):
         )
         if should_open:
             webbrowser.open("https://try.whylabsapp.com/?utm_source=whylogs")
-        echo(DONE)
     else:
         echo("Skip initial profiling and notebook generation")
-        echo(DONE)
+
+    echo(DONE)
 
 
 def profile_csv(session_config: SessionConfig, project_dir: str) -> str:

diff --git a/src/whylogs/core/annotation_profiling.py b/src/whylogs/core/annotation_profiling.py
@@ -62,9 +62,7 @@ def intersection(self, Rectangle_2):
         y_bottom = min(self.y2, Rectangle_2.y2)
         if x_right < x_left or y_bottom < y_top:
             return 0.0
-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
-
-        return intersection_area
+        return (x_right - x_left) * (y_bottom - y_top)
 
     def iou(self, Rectangle_2):
         intersection_area = self.intersection(Rectangle_2)

diff --git a/src/whylogs/core/columnprofile.py b/src/whylogs/core/columnprofile.py
@@ -159,7 +159,7 @@ def generate_constraints(self) -> SummaryConstraints:
             if summ.min > 0:
                 items = [SummaryConstraint(op=Op.GT, first_field='min', value=0)]
             # generate additional constraints here
-            if len(items) > 0:
+            if items:
                 return SummaryConstraints(items)
 
         return None

diff --git a/src/whylogs/core/datasetprofile.py b/src/whylogs/core/datasetprofile.py
@@ -131,9 +131,9 @@ def __init__(
         if columns is None:
             columns = {}
         if tags is None:
-            tags = dict()
+            tags = {}
         if metadata is None:
-            metadata = dict()
+            metadata = {}
         if session_id is None:
             session_id = uuid4().hex
 
@@ -826,10 +826,8 @@ def flatten_dataset_frequent_strings(dataset_summary: DatasetSummary):
         try:
             item_summary = getter(
                 getter(col, "string_summary"), "frequent").items
-            items = {}
-            for item in item_summary:
-                items[item.value] = int(item.estimate)
-            if len(items) > 0:
+            items = {item.value: int(item.estimate) for item in item_summary}
+            if items:
                 frequent_strings[col_name] = items
         except KeyError:
             continue

diff --git a/src/whylogs/core/image_profiling.py b/src/whylogs/core/image_profiling.py
@@ -41,8 +41,7 @@
 def image_loader(path: str = None) -> ImageType:
     from PIL import Image
     with open(path, "rb") as file_p:
-        img = Image.open(file_p).copy()
-        return img
+        return Image.open(file_p).copy()
 
 
 class TrackImage:
@@ -67,11 +66,7 @@ def __init__(self,
         if filepath is None and img is None:
             raise ValueError("Need image filepath or image data")
 
-        if filepath is not None:
-            self.img = image_loader(filepath)
-        else:
-            self.img = img
-
+        self.img = image_loader(filepath) if filepath is not None else img
         self.feature_transforms = feature_transforms
 
         if feature_transforms is None:

diff --git a/src/whylogs/core/model_profile.py b/src/whylogs/core/model_profile.py
@@ -88,10 +88,7 @@ def to_protobuf(self):
     @classmethod
     def from_protobuf(cls, message: ModelProfileMessage):
         # convert google.protobuf.pyext._message.RepeatedScalarContainer to a list
-        output_fields = []
-        for f in message.output_fields:
-            output_fields.append(f)
-
+        output_fields = [f for f in message.output_fields]
         return ModelProfile(output_fields=output_fields,
                             metrics=ModelMetrics.from_protobuf(message.metrics))
 

diff --git a/src/whylogs/core/statistics/constraints.py b/src/whylogs/core/statistics/constraints.py
@@ -182,22 +182,21 @@ def from_protobuf(msg: SummaryConstraintMsg) -> 'SummaryConstraint':
 
     def to_protobuf(self) -> SummaryConstraintMsg:
         if self.second_field is None:
-            msg = SummaryConstraintMsg(
+            return SummaryConstraintMsg(
                 name=self.name,
                 first_field=self.first_field,
                 op=self.op,
                 value=self.value,
                 verbose=self._verbose,
             )
         else:
-            msg = SummaryConstraintMsg(
+            return SummaryConstraintMsg(
                 name=self.name,
                 first_field=self.first_field,
                 op=self.op,
                 second_field=self.second_field,
                 verbose=self._verbose,
             )
-        return msg
 
     def report(self):
         return (self.name, self.total, self.failures)
@@ -210,13 +209,13 @@ def __init__(self, constraints: List[ValueConstraint] = []):
     @staticmethod
     def from_protobuf(msg: ValueConstraintMsgs) -> 'ValueConstraints':
         v = [ValueConstraint.from_protobuf(c) for c in msg.constraints]
-        if len(v) > 0:
+        if v:
             return ValueConstraints(v)
         return None
 
     def to_protobuf(self) -> ValueConstraintMsgs:
         v = [c.to_protobuf() for c in self.constraints]
-        if len(v) > 0:
+        if v:
             vcmsg = ValueConstraintMsgs()
             vcmsg.constraints.extend(v)
             return vcmsg
@@ -228,7 +227,7 @@ def update(self, v):
 
     def report(self) -> List[tuple]:
         v = [c.report() for c in self.constraints]
-        if len(v) > 0:
+        if v:
             return v
         return None
 
@@ -240,13 +239,13 @@ def __init__(self, constraints: List[SummaryConstraint]):
     @staticmethod
     def from_protobuf(msg: SummaryConstraintMsgs) -> 'SummaryConstraints':
         v = [SummaryConstraint.from_protobuf(c) for c in msg.constraints]
-        if len(v) > 0:
+        if v:
             return SummaryConstraints(v)
         return None
 
     def to_protobuf(self) -> SummaryConstraintMsgs:
         v = [c.to_protobuf() for c in self.constraints]
-        if len(v) > 0:
+        if v:
             scmsg = SummaryConstraintMsgs()
             scmsg.constraints.extend(v)
             return scmsg
@@ -258,7 +257,7 @@ def update(self, v):
 
     def report(self) -> List[tuple]:
         v = [c.report() for c in self.constraints]
-        if len(v) > 0:
+        if v:
             return v
         return None
 
@@ -278,7 +277,7 @@ def __init__(self,
                 value_constraints[k] = ValueConstraints(v)
         self.value_constraint_map = value_constraints
         if summary_constraints is None:
-            summary_constraints = dict()
+            summary_constraints = {}
         for k, v in summary_constraints.items():
             if isinstance(v, list):
                 summary_constraints[k] = SummaryConstraints(v)

diff --git a/src/whylogs/core/statistics/numbertracker.py b/src/whylogs/core/statistics/numbertracker.py
@@ -144,8 +144,7 @@ def to_protobuf(self):
             opts["doubles"] = self.floats.to_protobuf()
         elif self.ints.count > 0:
             opts["longs"] = self.ints.to_protobuf()
-        msg = NumbersMessage(**opts)
-        return msg
+        return NumbersMessage(**opts)
 
     @staticmethod
     def from_protobuf(message: NumbersMessage):

diff --git a/src/whylogs/core/statistics/schematracker.py b/src/whylogs/core/statistics/schematracker.py
@@ -73,7 +73,7 @@ def infer_type(self):
 
         # Integral is considered a subset of fractional here
         fractional_count = sum(
-            [type_counts.get(k, 0) for k in (Type.INTEGRAL, Type.FRACTIONAL)]
+            type_counts.get(k, 0) for k in (Type.INTEGRAL, Type.FRACTIONAL)
         )
 
         if (
@@ -82,11 +82,15 @@ def infer_type(self):
         ):
             # treat everything else as "String" except UNKNOWN
             coerced_count = sum(
-                [
-                    type_counts.get(k, 0)
-                    for k in (Type.INTEGRAL, Type.FRACTIONAL, Type.STRING, Type.BOOLEAN)
-                ]
+                type_counts.get(k, 0)
+                for k in (
+                    Type.INTEGRAL,
+                    Type.FRACTIONAL,
+                    Type.STRING,
+                    Type.BOOLEAN,
+                )
             )
+
             actual_ratio = float(coerced_count) / total_count
             return InferredType(type=Type.STRING, ratio=actual_ratio)
 

diff --git a/src/whylogs/core/statistics/thetasketch.py b/src/whylogs/core/statistics/thetasketch.py
@@ -23,10 +23,7 @@ class ThetaSketch:
     def __init__(self, theta_sketch=None, union=None, compact_theta=None):
         if theta_sketch is None:
             theta_sketch = datasketches.update_theta_sketch()
-        if union is None:
-            union = datasketches.theta_union()
-        else:
-            union = _copy_union(union)
+        union = datasketches.theta_union() if union is None else _copy_union(union)
         if compact_theta is not None:
             union.update(compact_theta)
 

diff --git a/src/whylogs/features/transforms.py b/src/whylogs/features/transforms.py
@@ -157,8 +157,7 @@ def __call__(self, img: Union[ImageType, np.ndarray]) -> float:
         img = img.convert("RGB")
         img = img.filter(ImageFilter.Kernel((3, 3), (-1, -1, -1, -1, 8,
                                                      -1, -1, -1, -1), 1, 0))
-        value = np.var(np.array(img).flatten()).reshape((-1, 1))
-        return value
+        return np.var(np.array(img).flatten()).reshape((-1, 1))
 
     def __repr__(self):
         return self.__class__.__name__
diff --git a/src/whylogs/io/file_loader.py b/src/whylogs/io/file_loader.py
@@ -144,7 +144,7 @@ def file_loader(path: str, valid_file: Callable[[str], bool] = valid_file) -> An
     if ext in IMAGE_EXTENSIONS:
         data, file_format = image_loader(path)
         return (data, magic_data), file_format
-    elif ((ext == ".json") or (ext == ".jsonl")):
+    elif ext in [".json", ".jsonl"]:
         data, file_format = json_loader(path)
         return (data, magic_data), file_format
     elif (ext == ".csv"):