Skip to content

Commit

Permalink
Merge 2348566 into ab99be3
Browse files Browse the repository at this point in the history
  • Loading branch information
sourcery-ai[bot] committed Mar 25, 2021
2 parents ab99be3 + 2348566 commit 9c3668c
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 32 deletions.
22 changes: 22 additions & 0 deletions pydoc-markdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
loaders:
- type: python
search_path: [./src]
processors:
- type: filter
skip_empty_modules: true
exclude_private: true
document_only: false
- type: smart
- type: google
- type: crossref
renderer:
type: docusaurus
docs_base_path: docusaurus
relative_output_path: whylogs-api
relative_sidebar_path: sidebar.json
sidebar_top_level_label: 'Reference'
signature_in_header: true
markdown:
render_toc: true
escape_html_in_docstring: true

54 changes: 22 additions & 32 deletions src/whylogs/app/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,14 @@ class Logger:
:param session_timestamp: Optional. The time the session was created
:param tags: Optional. Dictionary of key, value for aggregating data upstream
:param metadata: Optional. Dictionary of key, value. Useful for debugging (associated with every single dataset profile)
:param writers: List of Writer objects used to write out the data
:param with_rotation_time. Whether to rotate with time, takes values of overall rotation interval,
"s" for seconds
"m" for minutes
"h" for hours
"d" for days
:param interval. Additinal time rotation multipler.
:param verbose: enable debug logging or not
:param cache_size: set how many dataprofiles to cache
:param segments: define either a list of egment keys or a list of segments tags: [ {"key":<featurename>,"value": <featurevalue>},... ]
:param writers: Optional. List of Writer objects used to write out the data
:param with_rotation_time: Optional. Combined with `interval` to create new output logs at regular intervals, \
"s" for seconds, "m" for minutes, "h" for hours, "d" for days \
Output filenames will have a suffix reflecting the rotation interval.
:param interval: Rotation interval multiplier, defaults to 1.
:param verbose: enable debug logging
:param cache_size: dataprofiles to cache
:param segments: define either a list of segment keys or a list of segments tags: [ {"key":<featurename>,"value": <featurevalue>},... ]
:param profile_full_dataset: when segmenting dataset, an option to keep the full unsegmented profile of the dataset.
:param constraints: static assertions to be applied to streams and summaries.
"""
Expand Down Expand Up @@ -127,9 +125,8 @@ def segmented_profiles(self, ) -> Dict[str, DatasetProfile]:

def get_segment(self, segment: Segment) -> Optional[DatasetProfile]:
hashed_seg = hash_segment(segment)
segment_profile = self._profiles[-1]["segmented_profiles"].get(
return self._profiles[-1]["segmented_profiles"].get(
hashed_seg, None)
return segment_profile

def set_segments(self, segments: Union[List[Segment], List[str]]) -> None:
if segments:
Expand Down Expand Up @@ -168,19 +165,15 @@ def _set_rotation(self, with_rotation_time: str = None):
if self.with_rotation_time == 's':
interval = 1 # one second
self.suffix = "%Y-%m-%d_%H-%M-%S"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$"
elif self.with_rotation_time == 'm':
interval = 60 # one minute
self.suffix = "%Y-%m-%d_%H-%M"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$"
elif self.with_rotation_time == 'h':
interval = 60 * 60 # one hour
self.suffix = "%Y-%m-%d_%H"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$"
elif self.with_rotation_time == 'd':
interval = 60 * 60 * 24 # one day
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
else:
raise TypeError("Invalid choice of rotation time, valid choices are {}".format(
TIME_ROTATION_VALUES))
Expand All @@ -190,18 +183,15 @@ def _set_rotation(self, with_rotation_time: str = None):
self.rotate_at = self.rotate_when(current_time)

def rotate_when(self, time):
result = time + self.interval
return result
return time + self.interval

def should_rotate(self, ):

if self.with_rotation_time is None:
return False

current_time = int(datetime.datetime.utcnow().timestamp())
if current_time >= self.rotate_at:
return True
return False
return current_time >= self.rotate_at

def _rotate_time(self):
"""
Expand All @@ -216,7 +206,7 @@ def _rotate_time(self):
time_tuple.strftime(self.suffix), self.suffix)

# modify the segment datetime stamps
if (self.segments is None) or ((self.segments is not None) and self.profile_full_dataset):
if self.segments is None or self.profile_full_dataset:
self._profiles[-1]["full_profile"].dataset_timestamp = log_datetime
if self.segments is not None:
for _, each_prof in self._profiles[-1]["segmented_profiles"].items():
Expand Down Expand Up @@ -259,11 +249,10 @@ def flush(self, rotation_suffix: str = None):
for hashseg, each_seg_prof in self._profiles[-1]["segmented_profiles"].items():
seg_suffix = hashseg
full_suffix = "_" + seg_suffix
if rotation_suffix is None:
writer.write(each_seg_prof, full_suffix)
else:
if rotation_suffix is not None:
full_suffix += rotation_suffix
writer.write(each_seg_prof, full_suffix)

writer.write(each_seg_prof, full_suffix)

def full_profile_check(self, ) -> bool:
"""
Expand Down Expand Up @@ -417,7 +406,7 @@ def log_local_dataset(self, root_dir, folder_feature_name="folder_feature", imag
if isinstance(data, pd.DataFrame):
self.log_dataframe(data)

elif isinstance(data, Dict) or isinstance(data, list):
elif isinstance(data, (Dict, list)):
self.log_annotation(annotation_data=data)
elif isinstance(data, ImageType):
if image_feature_transforms:
Expand Down Expand Up @@ -512,10 +501,11 @@ def log_segments_keys(self, data):
for each_segment in segments:
try:
segment_df = grouped_data.get_group(each_segment)
segment_tags = []
for i in range(len(self.segments)):
segment_tags.append(
{"key": self.segments[i], "value": each_segment[i]})
segment_tags = [
{"key": self.segments[i], "value": each_segment[i]}
for i in range(len(self.segments))
]

self.log_df_segment(segment_df, segment_tags)
except KeyError:
continue
Expand All @@ -526,7 +516,7 @@ def log_fixed_segments(self, data):
for segment_tag in self.segments:
# create keys
segment_keys = [feature["key"] for feature in segment_tag]
seg = tuple([feature["value"] for feature in segment_tag])
seg = tuple(feature["value"] for feature in segment_tag)

grouped_data = data.groupby(segment_keys)

Expand Down

0 comments on commit 9c3668c

Please sign in to comment.