Skip to content

Commit

Permalink
Merge add305d into 865d24a
Browse files Browse the repository at this point in the history
  • Loading branch information
lalmei committed Feb 2, 2021
2 parents 865d24a + add305d commit 750a41b
Show file tree
Hide file tree
Showing 32 changed files with 927 additions and 72 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.13-dev8
current_version = 0.2.0-dev0
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def setup(app):
# built documents.
#
# The short X.Y version.
version = "0.1.13-dev8"
version = "0.2.0-dev0"
# The full version, including alpha/beta/rc tags.
release = "" # Is set by calling `setup.py docs`

Expand Down
8 changes: 6 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ nodeenv==1.5.0
numpy==1.19.4
oauthlib==3.1.0
packaging==20.7
pandas==1.1.4
pandas==1.2.1
parso==0.7.1
pathspec==0.8.1
pexpect==4.8.0
Expand Down Expand Up @@ -115,4 +115,8 @@ websocket-client==0.57.0
Werkzeug==1.0.1
whylabs-datasketches==2.0.0b7
zipp==3.4.0
smart-open==4.1.2
puremagic == 1.10
xlrd==2.0.1
openpyxl==3.0.6
tqdm==4.54.0
smart-open==4.1.2
9 changes: 7 additions & 2 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ fsspec==0.8.5
jmespath==0.10.0
marshmallow==3.10.0
matplotlib==3.3.3
numpy>=1.19.4,<=1.20.0
pandas==1.1.4
numpy==1.19.4
pandas==1.1.4;python_version < '3.7'
pandas==1.2.1;python_version > '3.7'
protobuf==4.0.0rc2
python-dateutil==2.8.1
pytz==2020.5
Expand All @@ -25,4 +26,8 @@ Pillow==8.1.0
moto==1.3.16
pytest-cov>=2.11.1
mlflow==1.13.1
puremagic ==1.10
xlrd==2.0.1
openpyxl==3.0.6
smart-open==4.1.2
tqdm==4.54.0
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,8 @@ s3transfer==0.3.3
six==1.15.0
urllib3==1.26.3
whylabs-datasketches==2.0.0b7
smart-open==4.1.2
xlrd==2.0.1
openpyxl==3.0.6
puremagic == 1.10
smart-open==4.1.2
tqdm==4.54.0
10 changes: 8 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[metadata]
name = whylogs
version = 0.1.13-dev8
version = 0.2.0-dev0
description = Profile and monitor your ML data pipeline end-to-end
author = WhyLabs.ai
author-email = support@whylabs.ai
Expand Down Expand Up @@ -44,10 +44,16 @@ install_requires =
botocore>=1.17.44
smart-open==4.1.2
# very important: s3fs pulls in aiobotocore, which locks boto3
xlrd==2.0.1
openpyxl==3.0.6
puremagic == 1.10
tqdm==4.54.0

setup_requires =
pytest-runner
setuptools


# The usage of test_requires is discouraged, see `Dependency Management` docs
# tests_require = pytest; pytest-cov
# Require a specific Python version, e.g. Python 2.7 or >= 3.4
Expand Down Expand Up @@ -167,4 +173,4 @@ exclude_lines =
# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:
fail_under = 50
fail_under = 70
2 changes: 1 addition & 1 deletion src/whylogs/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""WhyLabs version number."""

__version__ = "0.1.13-dev8"
__version__ = "0.2.0-dev0"
107 changes: 89 additions & 18 deletions src/whylogs/app/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,26 @@
import datetime
import hashlib
import json
from pathlib import Path
import logging
from typing import List, Optional, Dict, Union, Callable, AnyStr
from typing.io import IO
from pathlib import Path

from tqdm import tqdm
import pandas as pd
from typing.io import IO

from whylogs.app.writers import Writer
from whylogs.core import DatasetProfile, TrackImage, METADATA_DEFAULT_ATTRIBUTES
from whylogs.core import DatasetProfile, TrackImage, METADATA_DEFAULT_ATTRIBUTES, TrackBB
from whylogs.io import LocalDataset

TIME_ROTATION_VALUES = ["s", "m", "h", "d"]

# TODO upgrade to Classes
SegmentTag = Dict[str, any]
Segment = List[SegmentTag]

logger = logging.getLogger(__name__)


class Logger:
"""
Expand Down Expand Up @@ -99,6 +104,15 @@ def profile(self, ) -> DatasetProfile:
"""
return self._profiles[-1]["full_profile"]

def tracking_checks(self):

if not self._active:
return False

if self.should_rotate():
self._rotate_time()
return True

@property
def segmented_profiles(self, ) -> Dict[str, DatasetProfile]:
"""
Expand Down Expand Up @@ -287,12 +301,9 @@ def log(
:param value: value of as single feature. Cannot be specified if 'features' is specified
"""
if not self._active:
if not self.tracking_checks():
return None

if self.should_rotate():
self._rotate_time()

if features is None and feature_name is None:
return

Expand Down Expand Up @@ -341,10 +352,8 @@ def log_image(self,
:param feature_transforms: a list of callables to transform the input into metrics
:type image: Union[str, PIL.image]
"""
if not self._active:
return
if self.should_rotate():
self._rotate_time()
if not self.tracking_checks():
return None

if isinstance(image, str):
track_image = TrackImage(image, feature_transforms=feature_transforms,
Expand All @@ -355,6 +364,73 @@ def log_image(self,

track_image(self._profiles[-1]["full_profile"])

def log_local_dataset(self, root_dir, folder_feature_name="folder_feature", image_feature_transforms=None, show_progress=False):
"""
Log a local folder dataset
It will log data from the files, along with structure file data like
metadata, and magic numbers. If the folder has single layer for children
folders, this will pick up folder names as a segmented feature
Args:
root_dir (str): directory where dataset is located.
folder_feature_name (str, optional): Name for the subfolder features, i.e. class, store etc.
v (None, optional): image transform that you would like to use with the image log
Raises:
NotImplementedError: Description
"""
try:
from PIL.Image import Image as ImageType
except ImportError as e:
ImageType = None
logger.debug(str(e))
logger.debug(
"Unable to load PIL; install Pillow for image support")

dst = LocalDataset(root_dir)
for idx in tqdm(range(len(dst)), disable=(not show_progress)):
# load internal and metadata from the next file
((data, magic_data), fmt), segment_value = dst[idx]

# log magic number data if any, fmt, and folder name.
self.log(feature_name="file_format", value=fmt)

self.log(feature_name=folder_feature_name, value=segment_value)

self.log(features=magic_data)

if isinstance(data, pd.DataFrame):
self.log_dataframe(data)

elif isinstance(data, Dict) or isinstance(data, list):
self.log_annotation(annotation_data=data)
elif isinstance(data, ImageType):
if image_feature_transforms:
self.log_image(
data, feature_transforms=image_feature_transforms, metadata_attributes=[])
else:
self.log_image(
data, metadata_attributes=[])
else:
raise NotImplementedError(
"File format not supported {}, format:{}".format(type(data), fmt))

def log_annotation(self, annotation_data):
"""
Log structured annotation data ie. JSON like structures
Args:
annotation_data (Dict or List): Description
Returns:
TYPE: Description
"""
if not self.tracking_checks():
return None
track_bounding_box = TrackBB(obj=annotation_data)
track_bounding_box(self._profiles[-1]["full_profile"])

def log_csv(self,
filepath_or_buffer: Union[str, Path, IO[AnyStr]],
segments: Optional[Union[List[Segment], List[str]]] = None,
Expand All @@ -369,10 +445,6 @@ def log_csv(self,
:param profile_full_dataset: when segmenting dataset, an option to keep the full unsegmented profile of the
dataset.
"""
if not self._active:
return
if self.should_rotate():
self._rotate_time()

self.profile_full_dataset = profile_full_dataset
if segments is not None:
Expand All @@ -392,10 +464,8 @@ def log_dataframe(self, df,
:param segments: specify the tag key value pairs for segments
:param df: the Pandas dataframe to log
"""
if not self._active:
if not self.tracking_checks():
return None
if self.should_rotate():
self._rotate_time()

# segment check in case segments are just keys
self.profile_full_dataset = profile_full_dataset
Expand Down Expand Up @@ -459,6 +529,7 @@ def log_df_segment(self, df, segment: Segment):
segment = sorted(segment, key=lambda x: x["key"])

segment_profile = self.get_segment(segment)

if segment_profile is None:
segment_profile = DatasetProfile(
self.dataset_name,
Expand Down
3 changes: 3 additions & 0 deletions src/whylogs/app/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ def logger(

return logger

def get_logger(self, dataset_name: str = None):
return self._loggers.get(dataset_name, None)

def log_dataframe(
self,
df: pd.DataFrame,
Expand Down
3 changes: 3 additions & 0 deletions src/whylogs/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from .columnprofile import ColumnProfile
from .datasetprofile import DatasetProfile
from .image_profiling import TrackImage, _METADATA_DEFAULT_ATTRIBUTES as METADATA_DEFAULT_ATTRIBUTES
from .annotation_profiling import TrackBB, BB_ATTRIBUTES

__ALL__ = [
ColumnProfile,
DatasetProfile,
TrackImage,
METADATA_DEFAULT_ATTRIBUTES,
TrackBB,
BB_ATTRIBUTES
]

0 comments on commit 750a41b

Please sign in to comment.