Skip to content

Commit

Permalink
Merge b48d1c1 into f95d424
Browse files Browse the repository at this point in the history
  • Loading branch information
sourcery-ai[bot] committed Mar 25, 2021
2 parents f95d424 + b48d1c1 commit 490cce5
Show file tree
Hide file tree
Showing 34 changed files with 87 additions and 130 deletions.
3 changes: 1 addition & 2 deletions scripts/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ def csv_reader(f, date_format: str = None, dropna=False, infer_dtypes=False, **k

for batch in pd.read_csv(f, **opts):
records = df_to_records(batch, dropna=dropna)
for record in records:
yield record
yield from records


def run(
Expand Down
2 changes: 0 additions & 2 deletions src/whylogs/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,11 @@ def load_config(path_to_config: str = None):
return session_config
except IOError as e:
logger.warning("Failed to load YAML config", e)
pass
else:
try:
with open(path_to_config, "rt") as f:
session_config = SessionConfig.from_yaml(f)
return session_config
except IOError as e:
logger.warning("Failed to load YAML config", e)
pass
return None
32 changes: 14 additions & 18 deletions src/whylogs/app/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,8 @@ def segmented_profiles(self, ) -> Dict[str, DatasetProfile]:

def get_segment(self, segment: Segment) -> Optional[DatasetProfile]:
hashed_seg = hash_segment(segment)
segment_profile = self._profiles[-1]["segmented_profiles"].get(
return self._profiles[-1]["segmented_profiles"].get(
hashed_seg, None)
return segment_profile

def set_segments(self, segments: Union[List[Segment], List[str]]) -> None:
if segments:
Expand Down Expand Up @@ -186,18 +185,15 @@ def _set_rotation(self, with_rotation_time: str = None):
self.rotate_at = self.rotate_when(current_time)

def rotate_when(self, time):
result = time + self.interval
return result
return time + self.interval

def should_rotate(self, ):

if self.with_rotation_time is None:
return False

current_time = int(datetime.datetime.utcnow().timestamp())
if current_time >= self.rotate_at:
return True
return False
return current_time >= self.rotate_at

def _rotate_time(self):
"""
Expand All @@ -212,7 +208,7 @@ def _rotate_time(self):
time_tuple.strftime(self.suffix), self.suffix)

# modify the segment datetime stamps
if (self.segments is None) or ((self.segments is not None) and self.profile_full_dataset):
if self.segments is None or self.profile_full_dataset:
self._profiles[-1]["full_profile"].dataset_timestamp = log_datetime
if self.segments is not None:
for _, each_prof in self._profiles[-1]["segmented_profiles"].items():
Expand Down Expand Up @@ -255,11 +251,10 @@ def flush(self, rotation_suffix: str = None):
for hashseg, each_seg_prof in self._profiles[-1]["segmented_profiles"].items():
seg_suffix = hashseg
full_suffix = "_" + seg_suffix
if rotation_suffix is None:
writer.write(each_seg_prof, full_suffix)
else:
if rotation_suffix is not None:
full_suffix += rotation_suffix
writer.write(each_seg_prof, full_suffix)

writer.write(each_seg_prof, full_suffix)

def full_profile_check(self, ) -> bool:
"""
Expand Down Expand Up @@ -413,7 +408,7 @@ def log_local_dataset(self, root_dir, folder_feature_name="folder_feature", imag
if isinstance(data, pd.DataFrame):
self.log_dataframe(data)

elif isinstance(data, Dict) or isinstance(data, list):
elif isinstance(data, (Dict, list)):
self.log_annotation(annotation_data=data)
elif isinstance(data, ImageType):
if image_feature_transforms:
Expand Down Expand Up @@ -508,10 +503,11 @@ def log_segments_keys(self, data):
for each_segment in segments:
try:
segment_df = grouped_data.get_group(each_segment)
segment_tags = []
for i in range(len(self.segments)):
segment_tags.append(
{"key": self.segments[i], "value": each_segment[i]})
segment_tags = [
{"key": self.segments[i], "value": each_segment[i]}
for i in range(len(self.segments))
]

self.log_df_segment(segment_df, segment_tags)
except KeyError:
continue
Expand All @@ -522,7 +518,7 @@ def log_fixed_segments(self, data):
for segment_tag in self.segments:
# create keys
segment_keys = [feature["key"] for feature in segment_tag]
seg = tuple([feature["value"] for feature in segment_tag])
seg = tuple(feature["value"] for feature in segment_tag)

grouped_data = data.groupby(segment_keys)

Expand Down
6 changes: 2 additions & 4 deletions src/whylogs/app/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,20 +267,18 @@ def new_profile(
session_timestamp = self._session_time

if tags is None:
tags = dict()
tags = {}
if self.pipeline:
tags["Pipeline"] = self.pipeline

profile = DatasetProfile(
return DatasetProfile(
dataset_name,
dataset_timestamp=dataset_timestamp,
session_timestamp=session_timestamp,
tags=tags,
metadata=metadata,
)

return profile

def close(self):
"""
Deactivate this session and flush all associated loggers
Expand Down
3 changes: 1 addition & 2 deletions src/whylogs/app/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ def path_suffix(self, profile: DatasetProfile):
applying the path templating defined in `self.path_template`
"""
kwargs = self.template_params(profile)
path = self.path_template.substitute(**kwargs)
return path
return self.path_template.substitute(**kwargs)

def file_name(self, profile: DatasetProfile, file_extension: str):
"""
Expand Down
4 changes: 2 additions & 2 deletions src/whylogs/cli/demo_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ def init(project_dir):
)
if should_open:
webbrowser.open("https://try.whylabsapp.com/?utm_source=whylogs")
echo(DONE)
else:
echo("Skip initial profiling and notebook generation")
echo(DONE)

echo(DONE)


def profile_csv(session_config: SessionConfig, project_dir: str) -> str:
Expand Down
4 changes: 1 addition & 3 deletions src/whylogs/core/annotation_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ def intersection(self, Rectangle_2):
y_bottom = min(self.y2, Rectangle_2.y2)
if x_right < x_left or y_bottom < y_top:
return 0.0
intersection_area = (x_right - x_left) * (y_bottom - y_top)

return intersection_area
return (x_right - x_left) * (y_bottom - y_top)

def iou(self, Rectangle_2):
intersection_area = self.intersection(Rectangle_2)
Expand Down
2 changes: 1 addition & 1 deletion src/whylogs/core/columnprofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def generate_constraints(self) -> SummaryConstraints:
if summ.min > 0:
items = [SummaryConstraint(op=Op.GT, first_field='min', value=0)]
# generate additional constraints here
if len(items) > 0:
if items:
return SummaryConstraints(items)

return None
Expand Down
10 changes: 4 additions & 6 deletions src/whylogs/core/datasetprofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ def __init__(
if columns is None:
columns = {}
if tags is None:
tags = dict()
tags = {}
if metadata is None:
metadata = dict()
metadata = {}
if session_id is None:
session_id = uuid4().hex

Expand Down Expand Up @@ -826,10 +826,8 @@ def flatten_dataset_frequent_strings(dataset_summary: DatasetSummary):
try:
item_summary = getter(
getter(col, "string_summary"), "frequent").items
items = {}
for item in item_summary:
items[item.value] = int(item.estimate)
if len(items) > 0:
items = {item.value: int(item.estimate) for item in item_summary}
if items:
frequent_strings[col_name] = items
except KeyError:
continue
Expand Down
9 changes: 2 additions & 7 deletions src/whylogs/core/image_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@
def image_loader(path: str = None) -> ImageType:
from PIL import Image
with open(path, "rb") as file_p:
img = Image.open(file_p).copy()
return img
return Image.open(file_p).copy()


class TrackImage:
Expand All @@ -67,11 +66,7 @@ def __init__(self,
if filepath is None and img is None:
raise ValueError("Need image filepath or image data")

if filepath is not None:
self.img = image_loader(filepath)
else:
self.img = img

self.img = image_loader(filepath) if filepath is not None else img
self.feature_transforms = feature_transforms

if feature_transforms is None:
Expand Down
5 changes: 1 addition & 4 deletions src/whylogs/core/model_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,7 @@ def to_protobuf(self):
@classmethod
def from_protobuf(cls, message: ModelProfileMessage):
# convert google.protobuf.pyext._message.RepeatedScalarContainer to a list
output_fields = []
for f in message.output_fields:
output_fields.append(f)

output_fields = [f for f in message.output_fields]
return ModelProfile(output_fields=output_fields,
metrics=ModelMetrics.from_protobuf(message.metrics))

Expand Down
19 changes: 9 additions & 10 deletions src/whylogs/core/statistics/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,22 +182,21 @@ def from_protobuf(msg: SummaryConstraintMsg) -> 'SummaryConstraint':

def to_protobuf(self) -> SummaryConstraintMsg:
if self.second_field is None:
msg = SummaryConstraintMsg(
return SummaryConstraintMsg(
name=self.name,
first_field=self.first_field,
op=self.op,
value=self.value,
verbose=self._verbose,
)
else:
msg = SummaryConstraintMsg(
return SummaryConstraintMsg(
name=self.name,
first_field=self.first_field,
op=self.op,
second_field=self.second_field,
verbose=self._verbose,
)
return msg

def report(self):
return (self.name, self.total, self.failures)
Expand All @@ -210,13 +209,13 @@ def __init__(self, constraints: List[ValueConstraint] = []):
@staticmethod
def from_protobuf(msg: ValueConstraintMsgs) -> 'ValueConstraints':
v = [ValueConstraint.from_protobuf(c) for c in msg.constraints]
if len(v) > 0:
if v:
return ValueConstraints(v)
return None

def to_protobuf(self) -> ValueConstraintMsgs:
v = [c.to_protobuf() for c in self.constraints]
if len(v) > 0:
if v:
vcmsg = ValueConstraintMsgs()
vcmsg.constraints.extend(v)
return vcmsg
Expand All @@ -228,7 +227,7 @@ def update(self, v):

def report(self) -> List[tuple]:
v = [c.report() for c in self.constraints]
if len(v) > 0:
if v:
return v
return None

Expand All @@ -240,13 +239,13 @@ def __init__(self, constraints: List[SummaryConstraint]):
@staticmethod
def from_protobuf(msg: SummaryConstraintMsgs) -> 'SummaryConstraints':
v = [SummaryConstraint.from_protobuf(c) for c in msg.constraints]
if len(v) > 0:
if v:
return SummaryConstraints(v)
return None

def to_protobuf(self) -> SummaryConstraintMsgs:
v = [c.to_protobuf() for c in self.constraints]
if len(v) > 0:
if v:
scmsg = SummaryConstraintMsgs()
scmsg.constraints.extend(v)
return scmsg
Expand All @@ -258,7 +257,7 @@ def update(self, v):

def report(self) -> List[tuple]:
v = [c.report() for c in self.constraints]
if len(v) > 0:
if v:
return v
return None

Expand All @@ -278,7 +277,7 @@ def __init__(self,
value_constraints[k] = ValueConstraints(v)
self.value_constraint_map = value_constraints
if summary_constraints is None:
summary_constraints = dict()
summary_constraints = {}
for k, v in summary_constraints.items():
if isinstance(v, list):
summary_constraints[k] = SummaryConstraints(v)
Expand Down
3 changes: 1 addition & 2 deletions src/whylogs/core/statistics/numbertracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,7 @@ def to_protobuf(self):
opts["doubles"] = self.floats.to_protobuf()
elif self.ints.count > 0:
opts["longs"] = self.ints.to_protobuf()
msg = NumbersMessage(**opts)
return msg
return NumbersMessage(**opts)

@staticmethod
def from_protobuf(message: NumbersMessage):
Expand Down
14 changes: 9 additions & 5 deletions src/whylogs/core/statistics/schematracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def infer_type(self):

# Integral is considered a subset of fractional here
fractional_count = sum(
[type_counts.get(k, 0) for k in (Type.INTEGRAL, Type.FRACTIONAL)]
type_counts.get(k, 0) for k in (Type.INTEGRAL, Type.FRACTIONAL)
)

if (
Expand All @@ -82,11 +82,15 @@ def infer_type(self):
):
# treat everything else as "String" except UNKNOWN
coerced_count = sum(
[
type_counts.get(k, 0)
for k in (Type.INTEGRAL, Type.FRACTIONAL, Type.STRING, Type.BOOLEAN)
]
type_counts.get(k, 0)
for k in (
Type.INTEGRAL,
Type.FRACTIONAL,
Type.STRING,
Type.BOOLEAN,
)
)

actual_ratio = float(coerced_count) / total_count
return InferredType(type=Type.STRING, ratio=actual_ratio)

Expand Down
5 changes: 1 addition & 4 deletions src/whylogs/core/statistics/thetasketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ class ThetaSketch:
def __init__(self, theta_sketch=None, union=None, compact_theta=None):
if theta_sketch is None:
theta_sketch = datasketches.update_theta_sketch()
if union is None:
union = datasketches.theta_union()
else:
union = _copy_union(union)
union = datasketches.theta_union() if union is None else _copy_union(union)
if compact_theta is not None:
union.update(compact_theta)

Expand Down
3 changes: 1 addition & 2 deletions src/whylogs/features/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ def __call__(self, img: Union[ImageType, np.ndarray]) -> float:
img = img.convert("RGB")
img = img.filter(ImageFilter.Kernel((3, 3), (-1, -1, -1, -1, 8,
-1, -1, -1, -1), 1, 0))
value = np.var(np.array(img).flatten()).reshape((-1, 1))
return value
return np.var(np.array(img).flatten()).reshape((-1, 1))

def __repr__(self):
return self.__class__.__name__
2 changes: 1 addition & 1 deletion src/whylogs/io/file_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def file_loader(path: str, valid_file: Callable[[str], bool] = valid_file) -> An
if ext in IMAGE_EXTENSIONS:
data, file_format = image_loader(path)
return (data, magic_data), file_format
elif ((ext == ".json") or (ext == ".jsonl")):
elif ext in [".json", ".jsonl"]:
data, file_format = json_loader(path)
return (data, magic_data), file_format
elif (ext == ".csv"):
Expand Down
Loading

0 comments on commit 490cce5

Please sign in to comment.